-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[DAGCombiner] Allow promoted constants in MULHU by power-of-2 -> SRL transform #170562
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAGCombiner] Allow promoted constants in MULHU by power-of-2 -> SRL transform #170562
Conversation
|
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-selectiondag Author: Valeriy Savchenko (SavchenkoValeriy) ChangesType legalization can promote constant operands. The MULHU optimization
This fixes both by adding Full diff: https://github.com/llvm/llvm-project/pull/170562.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 70950084ee6b7..ca48cdb2d28a3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
-// Do not permit build vector implicit truncation.
-static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+// Do not permit build vector implicit truncation unless AllowTruncation is set.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
+ bool AllowTruncation = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,8 +1077,13 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (Op.isUndef())
continue;
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
- if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
- (Const->isOpaque() && NoOpaques))
+ if (!Const || (Const->isOpaque() && NoOpaques))
+ return false;
+ // When AllowTruncation is true, allow constants that have been promoted
+ // during type legalization as long as the value fits in the target type.
+ if ((AllowTruncation &&
+ Const->getAPIntValue().getActiveBits() > BitWidth) ||
+ (!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
return false;
}
return true;
@@ -5322,7 +5328,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N->getValueType(0);
// fold (udiv x, (1 << c)) -> x >>u c
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5336,7 +5343,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
SDValue N10 = N1.getOperand(0);
- if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5352,7 +5360,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (isConstantOrConstantVector(N1) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+ /*AllowTruncation=*/true) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -5580,7 +5589,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return DAG.getConstant(0, DL, VT);
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true) &&
hasOperation(ISD::SRL, VT)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
unsigned NumEltBits = VT.getScalarSizeInBits();
@@ -29824,7 +29834,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
return false;
};
- if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
+ if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true)) {
if (!VT.isVector())
return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
// We need to create a build vector
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 783ec4b0bd211..e621f9d83a7b0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6738,7 +6738,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
- const APInt& Divisor = C->getAPIntValue();
+ // Truncate the divisor to the target scalar type in case it was promoted
+ // during type legalization.
+ APInt Divisor = C->getAPIntValue().trunc(EltBits);
SDValue PreShift, MagicFactor, NPQFactor, PostShift;
@@ -6779,7 +6781,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
};
// Collect the shifts/magic values from each element.
- if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue PreShift, PostShift, MagicFactor, NPQFactor;
diff --git a/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll b/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
new file mode 100644
index 0000000000000..46f994bfba9a2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define <8 x i16> @mulhu_v8i16_by_256(<8 x i16> %x) {
+; CHECK-LABEL: mulhu_v8i16_by_256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.8h, v0.8h, #8
+; CHECK-NEXT: ret
+ %x32 = zext <8 x i16> %x to <8 x i32>
+ %mul = mul <8 x i32> %x32, splat (i32 256)
+ %result = lshr <8 x i32> %mul, splat (i32 16)
+ %trunc = trunc <8 x i32> %result to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <16 x i16> @mulhu_v16i16_by_256(<16 x i16> %x) {
+; CHECK-LABEL: mulhu_v16i16_by_256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.8h, v0.8h, #8
+; CHECK-NEXT: ushr v1.8h, v1.8h, #8
+; CHECK-NEXT: ret
+ %x32 = zext <16 x i16> %x to <16 x i32>
+ %mul = mul <16 x i32> %x32, splat (i32 256)
+ %result = lshr <16 x i32> %mul, splat (i32 16)
+ %trunc = trunc <16 x i32> %result to <16 x i16>
+ ret <16 x i16> %trunc
+}
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index a55aaeb62830f..ffaf045fa45c2 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -1433,35 +1433,13 @@ entry:
define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_7:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT: movk w8, #9362, lsl #16
-; CHECK-SD-NEXT: umov w9, v0.h[0]
-; CHECK-SD-NEXT: umov w10, v0.h[1]
-; CHECK-SD-NEXT: umov w13, v0.h[2]
-; CHECK-SD-NEXT: umov w15, v0.h[3]
-; CHECK-SD-NEXT: umull x11, w9, w8
-; CHECK-SD-NEXT: umull x12, w10, w8
-; CHECK-SD-NEXT: umull x14, w13, w8
-; CHECK-SD-NEXT: lsr x11, x11, #32
-; CHECK-SD-NEXT: umull x8, w15, w8
-; CHECK-SD-NEXT: lsr x12, x12, #32
-; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3
-; CHECK-SD-NEXT: sub w12, w12, w12, lsl #3
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: add w9, w9, w11
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: add w10, w10, w12
-; CHECK-SD-NEXT: lsr x9, x14, #32
-; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: add w8, w15, w8
-; CHECK-SD-NEXT: add w9, w13, w9
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: movi v2.4h, #7
+; CHECK-SD-NEXT: dup v1.4h, w8
+; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_7:
@@ -1508,32 +1486,13 @@ entry:
define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_100:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29
-; CHECK-SD-NEXT: mov w14, #100 // =0x64
+; CHECK-SD-NEXT: mov w8, #656 // =0x290
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT: movk w8, #655, lsl #16
-; CHECK-SD-NEXT: umov w9, v0.h[0]
-; CHECK-SD-NEXT: umov w10, v0.h[1]
-; CHECK-SD-NEXT: umov w12, v0.h[2]
-; CHECK-SD-NEXT: umov w15, v0.h[3]
-; CHECK-SD-NEXT: umull x11, w9, w8
-; CHECK-SD-NEXT: umull x13, w10, w8
-; CHECK-SD-NEXT: lsr x11, x11, #32
-; CHECK-SD-NEXT: lsr x13, x13, #32
-; CHECK-SD-NEXT: msub w9, w11, w14, w9
-; CHECK-SD-NEXT: umull x11, w12, w8
-; CHECK-SD-NEXT: msub w10, w13, w14, w10
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: umull x8, w15, w8
-; CHECK-SD-NEXT: lsr x9, x11, #32
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: msub w9, w9, w14, w12
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: msub w8, w8, w14, w15
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: movi v2.4h, #100
+; CHECK-SD-NEXT: dup v1.4h, w8
+; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_100:
diff --git a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
new file mode 100644
index 0000000000000..cdd238cdd81ff
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test verifies that udiv by constant works correctly even when type
+; legalization promotes constant operands (e.g., i16 -> i32 in BUILD_VECTOR).
+; This is a regression test for a bug where v16i16 would be split into two
+; v8i16 operations during legalization, the i16 constants would be promoted
+; to i32, and then the second DAGCombine round would fail to recognize the
+; promoted constants when trying to convert udiv into mul+shift.
+
+define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: udiv_v8i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #7
+; CHECK-NEXT: ret
+ %div = udiv <8 x i16> %x, splat (i16 255)
+ ret <8 x i16> %div
+}
+
+define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: udiv_v16i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #7
+; CHECK-NEXT: ushr v1.8h, v1.8h, #7
+; CHECK-NEXT: ret
+ %div = udiv <16 x i16> %x, splat (i16 255)
+ ret <16 x i16> %div
+}
+
+define <8 x i16> @urem_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: urem_v8i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-NEXT: ushr v1.8h, v1.8h, #7
+; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
+; CHECK-NEXT: ret
+ %rem = urem <8 x i16> %x, splat (i16 255)
+ ret <8 x i16> %rem
+}
+
+define <16 x i16> @urem_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: urem_v16i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT: umull v4.4s, v0.4h, v2.4h
+; CHECK-NEXT: umull2 v5.4s, v1.8h, v2.8h
+; CHECK-NEXT: umull v2.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp2 v3.8h, v4.8h, v3.8h
+; CHECK-NEXT: movi v4.2d, #0xff00ff00ff00ff
+; CHECK-NEXT: uzp2 v2.8h, v2.8h, v5.8h
+; CHECK-NEXT: ushr v3.8h, v3.8h, #7
+; CHECK-NEXT: ushr v2.8h, v2.8h, #7
+; CHECK-NEXT: mls v0.8h, v3.8h, v4.8h
+; CHECK-NEXT: mls v1.8h, v2.8h, v4.8h
+; CHECK-NEXT: ret
+ %rem = urem <16 x i16> %x, splat (i16 255)
+ ret <16 x i16> %rem
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
index 3fd7f5be860cf..c0c9b1797f91f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
@@ -48,18 +48,11 @@ define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
}
define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_1(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv1i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv1i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv1i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 1 x i32> splat (i32 16) to <vscale x 1 x i64>
%vc = zext <vscale x 1 x i32> %va to <vscale x 1 x i64>
%vd = mul <vscale x 1 x i64> %vb, %vc
@@ -114,18 +107,11 @@ define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
}
define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_1(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv2i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv2i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv2i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 2 x i32> splat (i32 16) to <vscale x 2 x i64>
%vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = mul <vscale x 2 x i64> %vb, %vc
@@ -180,18 +166,11 @@ define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
}
define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_1(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv4i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv4i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv4i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 4 x i32> splat (i32 16) to <vscale x 4 x i64>
%vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = mul <vscale x 4 x i64> %vb, %vc
@@ -246,18 +225,11 @@ define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
}
define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv8i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv8i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv8i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 8 x i32> splat (i32 16) to <vscale x 8 x i64>
%vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = mul <vscale x 8 x i64> %vb, %vc
@@ -265,3 +237,6 @@ define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
%vf = trunc <vscale x 8 x i64> %ve to <vscale x 8 x i32>
ret <vscale x 8 x i32> %vf
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
|
|
@llvm/pr-subscribers-backend-aarch64 Author: Valeriy Savchenko (SavchenkoValeriy) ChangesType legalization can promote constant operands. The MULHU optimization
This fixes both by adding Full diff: https://github.com/llvm/llvm-project/pull/170562.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 70950084ee6b7..ca48cdb2d28a3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
-// Do not permit build vector implicit truncation.
-static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+// Do not permit build vector implicit truncation unless AllowTruncation is set.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
+ bool AllowTruncation = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,8 +1077,13 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (Op.isUndef())
continue;
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
- if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
- (Const->isOpaque() && NoOpaques))
+ if (!Const || (Const->isOpaque() && NoOpaques))
+ return false;
+ // When AllowTruncation is true, allow constants that have been promoted
+ // during type legalization as long as the value fits in the target type.
+ if ((AllowTruncation &&
+ Const->getAPIntValue().getActiveBits() > BitWidth) ||
+ (!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
return false;
}
return true;
@@ -5322,7 +5328,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N->getValueType(0);
// fold (udiv x, (1 << c)) -> x >>u c
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5336,7 +5343,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
SDValue N10 = N1.getOperand(0);
- if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5352,7 +5360,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (isConstantOrConstantVector(N1) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+ /*AllowTruncation=*/true) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -5580,7 +5589,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return DAG.getConstant(0, DL, VT);
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true) &&
hasOperation(ISD::SRL, VT)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
unsigned NumEltBits = VT.getScalarSizeInBits();
@@ -29824,7 +29834,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
return false;
};
- if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
+ if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true)) {
if (!VT.isVector())
return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
// We need to create a build vector
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 783ec4b0bd211..e621f9d83a7b0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6738,7 +6738,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
- const APInt& Divisor = C->getAPIntValue();
+ // Truncate the divisor to the target scalar type in case it was promoted
+ // during type legalization.
+ APInt Divisor = C->getAPIntValue().trunc(EltBits);
SDValue PreShift, MagicFactor, NPQFactor, PostShift;
@@ -6779,7 +6781,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
};
// Collect the shifts/magic values from each element.
- if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue PreShift, PostShift, MagicFactor, NPQFactor;
diff --git a/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll b/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
new file mode 100644
index 0000000000000..46f994bfba9a2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define <8 x i16> @mulhu_v8i16_by_256(<8 x i16> %x) {
+; CHECK-LABEL: mulhu_v8i16_by_256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.8h, v0.8h, #8
+; CHECK-NEXT: ret
+ %x32 = zext <8 x i16> %x to <8 x i32>
+ %mul = mul <8 x i32> %x32, splat (i32 256)
+ %result = lshr <8 x i32> %mul, splat (i32 16)
+ %trunc = trunc <8 x i32> %result to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <16 x i16> @mulhu_v16i16_by_256(<16 x i16> %x) {
+; CHECK-LABEL: mulhu_v16i16_by_256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.8h, v0.8h, #8
+; CHECK-NEXT: ushr v1.8h, v1.8h, #8
+; CHECK-NEXT: ret
+ %x32 = zext <16 x i16> %x to <16 x i32>
+ %mul = mul <16 x i32> %x32, splat (i32 256)
+ %result = lshr <16 x i32> %mul, splat (i32 16)
+ %trunc = trunc <16 x i32> %result to <16 x i16>
+ ret <16 x i16> %trunc
+}
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index a55aaeb62830f..ffaf045fa45c2 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -1433,35 +1433,13 @@ entry:
define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_7:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT: movk w8, #9362, lsl #16
-; CHECK-SD-NEXT: umov w9, v0.h[0]
-; CHECK-SD-NEXT: umov w10, v0.h[1]
-; CHECK-SD-NEXT: umov w13, v0.h[2]
-; CHECK-SD-NEXT: umov w15, v0.h[3]
-; CHECK-SD-NEXT: umull x11, w9, w8
-; CHECK-SD-NEXT: umull x12, w10, w8
-; CHECK-SD-NEXT: umull x14, w13, w8
-; CHECK-SD-NEXT: lsr x11, x11, #32
-; CHECK-SD-NEXT: umull x8, w15, w8
-; CHECK-SD-NEXT: lsr x12, x12, #32
-; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3
-; CHECK-SD-NEXT: sub w12, w12, w12, lsl #3
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: add w9, w9, w11
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: add w10, w10, w12
-; CHECK-SD-NEXT: lsr x9, x14, #32
-; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: add w8, w15, w8
-; CHECK-SD-NEXT: add w9, w13, w9
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: movi v2.4h, #7
+; CHECK-SD-NEXT: dup v1.4h, w8
+; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_7:
@@ -1508,32 +1486,13 @@ entry:
define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_100:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29
-; CHECK-SD-NEXT: mov w14, #100 // =0x64
+; CHECK-SD-NEXT: mov w8, #656 // =0x290
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT: movk w8, #655, lsl #16
-; CHECK-SD-NEXT: umov w9, v0.h[0]
-; CHECK-SD-NEXT: umov w10, v0.h[1]
-; CHECK-SD-NEXT: umov w12, v0.h[2]
-; CHECK-SD-NEXT: umov w15, v0.h[3]
-; CHECK-SD-NEXT: umull x11, w9, w8
-; CHECK-SD-NEXT: umull x13, w10, w8
-; CHECK-SD-NEXT: lsr x11, x11, #32
-; CHECK-SD-NEXT: lsr x13, x13, #32
-; CHECK-SD-NEXT: msub w9, w11, w14, w9
-; CHECK-SD-NEXT: umull x11, w12, w8
-; CHECK-SD-NEXT: msub w10, w13, w14, w10
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: umull x8, w15, w8
-; CHECK-SD-NEXT: lsr x9, x11, #32
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: msub w9, w9, w14, w12
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: msub w8, w8, w14, w15
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: movi v2.4h, #100
+; CHECK-SD-NEXT: dup v1.4h, w8
+; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_100:
diff --git a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
new file mode 100644
index 0000000000000..cdd238cdd81ff
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test verifies that udiv by constant works correctly even when type
+; legalization promotes constant operands (e.g., i16 -> i32 in BUILD_VECTOR).
+; This is a regression test for a bug where v16i16 would be split into two
+; v8i16 operations during legalization, the i16 constants would be promoted
+; to i32, and then the second DAGCombine round would fail to recognize the
+; promoted constants when trying to convert udiv into mul+shift.
+
+define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: udiv_v8i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #7
+; CHECK-NEXT: ret
+ %div = udiv <8 x i16> %x, splat (i16 255)
+ ret <8 x i16> %div
+}
+
+define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: udiv_v16i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #7
+; CHECK-NEXT: ushr v1.8h, v1.8h, #7
+; CHECK-NEXT: ret
+ %div = udiv <16 x i16> %x, splat (i16 255)
+ ret <16 x i16> %div
+}
+
+define <8 x i16> @urem_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: urem_v8i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-NEXT: ushr v1.8h, v1.8h, #7
+; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
+; CHECK-NEXT: ret
+ %rem = urem <8 x i16> %x, splat (i16 255)
+ ret <8 x i16> %rem
+}
+
+define <16 x i16> @urem_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: urem_v16i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT: umull v4.4s, v0.4h, v2.4h
+; CHECK-NEXT: umull2 v5.4s, v1.8h, v2.8h
+; CHECK-NEXT: umull v2.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp2 v3.8h, v4.8h, v3.8h
+; CHECK-NEXT: movi v4.2d, #0xff00ff00ff00ff
+; CHECK-NEXT: uzp2 v2.8h, v2.8h, v5.8h
+; CHECK-NEXT: ushr v3.8h, v3.8h, #7
+; CHECK-NEXT: ushr v2.8h, v2.8h, #7
+; CHECK-NEXT: mls v0.8h, v3.8h, v4.8h
+; CHECK-NEXT: mls v1.8h, v2.8h, v4.8h
+; CHECK-NEXT: ret
+ %rem = urem <16 x i16> %x, splat (i16 255)
+ ret <16 x i16> %rem
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
index 3fd7f5be860cf..c0c9b1797f91f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
@@ -48,18 +48,11 @@ define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
}
define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_1(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv1i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv1i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv1i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 1 x i32> splat (i32 16) to <vscale x 1 x i64>
%vc = zext <vscale x 1 x i32> %va to <vscale x 1 x i64>
%vd = mul <vscale x 1 x i64> %vb, %vc
@@ -114,18 +107,11 @@ define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
}
define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_1(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv2i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv2i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv2i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 2 x i32> splat (i32 16) to <vscale x 2 x i64>
%vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = mul <vscale x 2 x i64> %vb, %vc
@@ -180,18 +166,11 @@ define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
}
define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_1(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv4i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv4i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv4i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 4 x i32> splat (i32 16) to <vscale x 4 x i64>
%vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = mul <vscale x 4 x i64> %vb, %vc
@@ -246,18 +225,11 @@ define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
}
define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv8i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv8i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv8i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 8 x i32> splat (i32 16) to <vscale x 8 x i64>
%vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = mul <vscale x 8 x i64> %vb, %vc
@@ -265,3 +237,6 @@ define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
%vf = trunc <vscale x 8 x i64> %ve to <vscale x 8 x i32>
ret <vscale x 8 x i32> %vf
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
|
6ca289b to
55e24d9
Compare
…transform (llvm#170562) Type legalization can promote constant operands. The MULHU optimization `mulhu x, (1 << c) -> x >> (bitwidth - c)` was failing when constants were promoted because: 1. `isConstantOrConstantVector` check rejected promoted constants 2. `BuildLogBase2` -> `takeInexpensiveLog2` -> `matchUnaryPredicate` rejected promoted constants This fixes both by adding `AllowTruncation=true`, following the pattern from the recent UDIV fix (llvm#169491).
Type legalization can promote constant operands. The MULHU optimization
mulhu x, (1 << c) -> x >> (bitwidth - c)was failing when constants were promoted because:isConstantOrConstantVectorcheck rejected promoted constantsBuildLogBase2->takeInexpensiveLog2->matchUnaryPredicaterejected promoted constantsThis fixes both by adding
AllowTruncation=true, following the pattern from the recent UDIV fix (#169491).