Skip to content

Commit af39d95

Browse files
SavchenkoValeriykcloudy0717
authored andcommitted
[DAGCombiner] Allow promoted constants in MULHU by power-of-2 -> SRL transform (llvm#170562)
Type legalization can promote constant operands. The MULHU optimization `mulhu x, (1 << c) -> x >> (bitwidth - c)` was failing when constants were promoted because: 1. `isConstantOrConstantVector` check rejected promoted constants 2. `BuildLogBase2` -> `takeInexpensiveLog2` -> `matchUnaryPredicate` rejected promoted constants This fixes both by adding `AllowTruncation=true`, following the pattern from the recent UDIV fix (llvm#169491).
1 parent 48e75e4 commit af39d95

File tree

3 files changed

+54
-50
lines changed

3 files changed

+54
-50
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5591,7 +5591,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
55915591
return DAG.getConstant(0, DL, VT);
55925592

55935593
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5594-
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5594+
if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5595+
/*AllowTruncation=*/true) &&
55955596
hasOperation(ISD::SRL, VT)) {
55965597
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
55975598
unsigned NumEltBits = VT.getScalarSizeInBits();
@@ -29835,7 +29836,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2983529836
return false;
2983629837
};
2983729838

29838-
if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
29839+
if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo, /*AllowUndefs=*/false,
29840+
/*AllowTruncation=*/true)) {
2983929841
if (!VT.isVector())
2984029842
return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
2984129843
// We need to create a build vector
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
3+
4+
define <8 x i16> @mulhu_v8i16_by_256(<8 x i16> %x) {
5+
; CHECK-LABEL: mulhu_v8i16_by_256:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: ushr v0.8h, v0.8h, #8
8+
; CHECK-NEXT: ret
9+
%x32 = zext <8 x i16> %x to <8 x i32>
10+
%mul = mul <8 x i32> %x32, splat (i32 256)
11+
%result = lshr <8 x i32> %mul, splat (i32 16)
12+
%trunc = trunc <8 x i32> %result to <8 x i16>
13+
ret <8 x i16> %trunc
14+
}
15+
16+
define <16 x i16> @mulhu_v16i16_by_256(<16 x i16> %x) {
17+
; CHECK-LABEL: mulhu_v16i16_by_256:
18+
; CHECK: // %bb.0:
19+
; CHECK-NEXT: ushr v0.8h, v0.8h, #8
20+
; CHECK-NEXT: ushr v1.8h, v1.8h, #8
21+
; CHECK-NEXT: ret
22+
%x32 = zext <16 x i16> %x to <16 x i32>
23+
%mul = mul <16 x i32> %x32, splat (i32 256)
24+
%result = lshr <16 x i32> %mul, splat (i32 16)
25+
%trunc = trunc <16 x i32> %result to <16 x i16>
26+
ret <16 x i16> %trunc
27+
}

llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll

Lines changed: 23 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,11 @@ define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
4848
}
4949

5050
define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_1(<vscale x 1 x i32> %va) {
51-
; RV32-LABEL: vmulhu_vi_nxv1i32_1:
52-
; RV32: # %bb.0:
53-
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
54-
; RV32-NEXT: vsrl.vi v8, v8, 28
55-
; RV32-NEXT: ret
56-
;
57-
; RV64-LABEL: vmulhu_vi_nxv1i32_1:
58-
; RV64: # %bb.0:
59-
; RV64-NEXT: li a0, 16
60-
; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
61-
; RV64-NEXT: vmulhu.vx v8, v8, a0
62-
; RV64-NEXT: ret
51+
; CHECK-LABEL: vmulhu_vi_nxv1i32_1:
52+
; CHECK: # %bb.0:
53+
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
54+
; CHECK-NEXT: vsrl.vi v8, v8, 28
55+
; CHECK-NEXT: ret
6356
%vb = zext <vscale x 1 x i32> splat (i32 16) to <vscale x 1 x i64>
6457
%vc = zext <vscale x 1 x i32> %va to <vscale x 1 x i64>
6558
%vd = mul <vscale x 1 x i64> %vb, %vc
@@ -114,18 +107,11 @@ define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
114107
}
115108

116109
define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_1(<vscale x 2 x i32> %va) {
117-
; RV32-LABEL: vmulhu_vi_nxv2i32_1:
118-
; RV32: # %bb.0:
119-
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
120-
; RV32-NEXT: vsrl.vi v8, v8, 28
121-
; RV32-NEXT: ret
122-
;
123-
; RV64-LABEL: vmulhu_vi_nxv2i32_1:
124-
; RV64: # %bb.0:
125-
; RV64-NEXT: li a0, 16
126-
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
127-
; RV64-NEXT: vmulhu.vx v8, v8, a0
128-
; RV64-NEXT: ret
110+
; CHECK-LABEL: vmulhu_vi_nxv2i32_1:
111+
; CHECK: # %bb.0:
112+
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
113+
; CHECK-NEXT: vsrl.vi v8, v8, 28
114+
; CHECK-NEXT: ret
129115
%vb = zext <vscale x 2 x i32> splat (i32 16) to <vscale x 2 x i64>
130116
%vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
131117
%vd = mul <vscale x 2 x i64> %vb, %vc
@@ -180,18 +166,11 @@ define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
180166
}
181167

182168
define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_1(<vscale x 4 x i32> %va) {
183-
; RV32-LABEL: vmulhu_vi_nxv4i32_1:
184-
; RV32: # %bb.0:
185-
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
186-
; RV32-NEXT: vsrl.vi v8, v8, 28
187-
; RV32-NEXT: ret
188-
;
189-
; RV64-LABEL: vmulhu_vi_nxv4i32_1:
190-
; RV64: # %bb.0:
191-
; RV64-NEXT: li a0, 16
192-
; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
193-
; RV64-NEXT: vmulhu.vx v8, v8, a0
194-
; RV64-NEXT: ret
169+
; CHECK-LABEL: vmulhu_vi_nxv4i32_1:
170+
; CHECK: # %bb.0:
171+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
172+
; CHECK-NEXT: vsrl.vi v8, v8, 28
173+
; CHECK-NEXT: ret
195174
%vb = zext <vscale x 4 x i32> splat (i32 16) to <vscale x 4 x i64>
196175
%vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
197176
%vd = mul <vscale x 4 x i64> %vb, %vc
@@ -246,22 +225,18 @@ define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
246225
}
247226

248227
define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
249-
; RV32-LABEL: vmulhu_vi_nxv8i32_1:
250-
; RV32: # %bb.0:
251-
; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
252-
; RV32-NEXT: vsrl.vi v8, v8, 28
253-
; RV32-NEXT: ret
254-
;
255-
; RV64-LABEL: vmulhu_vi_nxv8i32_1:
256-
; RV64: # %bb.0:
257-
; RV64-NEXT: li a0, 16
258-
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
259-
; RV64-NEXT: vmulhu.vx v8, v8, a0
260-
; RV64-NEXT: ret
228+
; CHECK-LABEL: vmulhu_vi_nxv8i32_1:
229+
; CHECK: # %bb.0:
230+
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
231+
; CHECK-NEXT: vsrl.vi v8, v8, 28
232+
; CHECK-NEXT: ret
261233
%vb = zext <vscale x 8 x i32> splat (i32 16) to <vscale x 8 x i64>
262234
%vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
263235
%vd = mul <vscale x 8 x i64> %vb, %vc
264236
%ve = lshr <vscale x 8 x i64> %vd, splat (i64 32)
265237
%vf = trunc <vscale x 8 x i64> %ve to <vscale x 8 x i32>
266238
ret <vscale x 8 x i32> %vf
267239
}
240+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
241+
; RV32: {{.*}}
242+
; RV64: {{.*}}

0 commit comments

Comments
 (0)