Skip to content

Commit 4c6b882

Browse files
authored
[DAG] Fold mul 0 -> 0 when expanding mul into parts. (#168780)
If the upper bits are zero, but we expand multiply then immediately convert the multiple into a libcall, there is no opportunity to optimize away the mul. Do so in getNode to make sure extending multiplies optimise cleanly.
1 parent c9ebc89 commit 4c6b882

File tree

6 files changed

+98
-139
lines changed

6 files changed

+98
-139
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7656,6 +7656,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
76567656
N1.getValueType() == VT && "Binary operator types must match!");
76577657
if (VT.getScalarType() == MVT::i1)
76587658
return getNode(ISD::AND, DL, VT, N1, N2);
7659+
if (N2CV && N2CV->isZero())
7660+
return N2;
76597661
if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
76607662
const APInt &MulImm = N1->getConstantOperandAPInt(0);
76617663
const APInt &N2CImm = N2C->getAPIntValue();

llvm/test/CodeGen/AArch64/combine-sdiv.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,8 +1673,9 @@ define i32 @combine_i32_sdiv_const100(i32 %x) {
16731673
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
16741674
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
16751675
; CHECK-SD-NEXT: smull x8, w0, w8
1676-
; CHECK-SD-NEXT: asr x8, x8, #37
1677-
; CHECK-SD-NEXT: add w0, w8, w8, lsr #31
1676+
; CHECK-SD-NEXT: asr x9, x8, #37
1677+
; CHECK-SD-NEXT: add x0, x9, x8, lsr #63
1678+
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 killed $x0
16781679
; CHECK-SD-NEXT: ret
16791680
;
16801681
; CHECK-GI-LABEL: combine_i32_sdiv_const100:

llvm/test/CodeGen/AArch64/rem-by-const.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,11 @@ define i32 @si32_100(i32 %a, i32 %b) {
279279
; CHECK-SD-LABEL: si32_100:
280280
; CHECK-SD: // %bb.0: // %entry
281281
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
282-
; CHECK-SD-NEXT: mov w9, #100 // =0x64
283282
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
284283
; CHECK-SD-NEXT: smull x8, w0, w8
285-
; CHECK-SD-NEXT: asr x8, x8, #37
286-
; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
284+
; CHECK-SD-NEXT: asr x9, x8, #37
285+
; CHECK-SD-NEXT: add x8, x9, x8, lsr #63
286+
; CHECK-SD-NEXT: mov w9, #100 // =0x64
287287
; CHECK-SD-NEXT: msub w0, w8, w9, w0
288288
; CHECK-SD-NEXT: ret
289289
;
@@ -853,22 +853,22 @@ define <3 x i8> @sv3i8_100(<3 x i8> %d, <3 x i8> %e) {
853853
; CHECK-SD-NEXT: sxtb x10, w1
854854
; CHECK-SD-NEXT: movk w9, #20971, lsl #16
855855
; CHECK-SD-NEXT: sxtb x11, w2
856-
; CHECK-SD-NEXT: sxtb w12, w0
856+
; CHECK-SD-NEXT: sxtb w13, w0
857857
; CHECK-SD-NEXT: smull x8, w8, w9
858858
; CHECK-SD-NEXT: smull x10, w10, w9
859859
; CHECK-SD-NEXT: smull x9, w11, w9
860-
; CHECK-SD-NEXT: mov w11, #100 // =0x64
861-
; CHECK-SD-NEXT: asr x8, x8, #37
862-
; CHECK-SD-NEXT: asr x10, x10, #37
863-
; CHECK-SD-NEXT: asr x9, x9, #37
864-
; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
865-
; CHECK-SD-NEXT: add w10, w10, w10, lsr #31
866-
; CHECK-SD-NEXT: add w9, w9, w9, lsr #31
867-
; CHECK-SD-NEXT: msub w0, w8, w11, w12
860+
; CHECK-SD-NEXT: asr x11, x8, #37
861+
; CHECK-SD-NEXT: asr x12, x10, #37
862+
; CHECK-SD-NEXT: add x8, x11, x8, lsr #63
863+
; CHECK-SD-NEXT: asr x11, x9, #37
864+
; CHECK-SD-NEXT: add x10, x12, x10, lsr #63
865+
; CHECK-SD-NEXT: mov w12, #100 // =0x64
866+
; CHECK-SD-NEXT: add x9, x11, x9, lsr #63
867+
; CHECK-SD-NEXT: msub w0, w8, w12, w13
868868
; CHECK-SD-NEXT: sxtb w8, w1
869-
; CHECK-SD-NEXT: msub w1, w10, w11, w8
869+
; CHECK-SD-NEXT: msub w1, w10, w12, w8
870870
; CHECK-SD-NEXT: sxtb w8, w2
871-
; CHECK-SD-NEXT: msub w2, w9, w11, w8
871+
; CHECK-SD-NEXT: msub w2, w9, w12, w8
872872
; CHECK-SD-NEXT: ret
873873
;
874874
; CHECK-GI-LABEL: sv3i8_100:
@@ -1733,23 +1733,23 @@ define <3 x i16> @sv3i16_100(<3 x i16> %d, <3 x i16> %e) {
17331733
; CHECK-SD-NEXT: smov x10, v0.h[1]
17341734
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
17351735
; CHECK-SD-NEXT: smov x11, v0.h[2]
1736-
; CHECK-SD-NEXT: mov w12, #100 // =0x64
1737-
; CHECK-SD-NEXT: smov w13, v0.h[1]
17381736
; CHECK-SD-NEXT: smull x9, w9, w8
17391737
; CHECK-SD-NEXT: smull x10, w10, w8
17401738
; CHECK-SD-NEXT: smull x8, w11, w8
17411739
; CHECK-SD-NEXT: smov w11, v0.h[0]
1742-
; CHECK-SD-NEXT: asr x9, x9, #37
1743-
; CHECK-SD-NEXT: asr x10, x10, #37
1744-
; CHECK-SD-NEXT: add w9, w9, w9, lsr #31
1745-
; CHECK-SD-NEXT: asr x8, x8, #37
1746-
; CHECK-SD-NEXT: add w10, w10, w10, lsr #31
1740+
; CHECK-SD-NEXT: asr x12, x9, #37
1741+
; CHECK-SD-NEXT: asr x13, x10, #37
1742+
; CHECK-SD-NEXT: add x9, x12, x9, lsr #63
1743+
; CHECK-SD-NEXT: mov w12, #100 // =0x64
1744+
; CHECK-SD-NEXT: add x10, x13, x10, lsr #63
1745+
; CHECK-SD-NEXT: smov w13, v0.h[1]
17471746
; CHECK-SD-NEXT: msub w9, w9, w12, w11
1748-
; CHECK-SD-NEXT: smov w11, v0.h[2]
1749-
; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
1747+
; CHECK-SD-NEXT: asr x11, x8, #37
17501748
; CHECK-SD-NEXT: msub w10, w10, w12, w13
1751-
; CHECK-SD-NEXT: msub w8, w8, w12, w11
1749+
; CHECK-SD-NEXT: add x8, x11, x8, lsr #63
1750+
; CHECK-SD-NEXT: smov w11, v0.h[2]
17521751
; CHECK-SD-NEXT: fmov s0, w9
1752+
; CHECK-SD-NEXT: msub w8, w8, w12, w11
17531753
; CHECK-SD-NEXT: mov v0.h[1], w10
17541754
; CHECK-SD-NEXT: mov v0.h[2], w8
17551755
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -2387,12 +2387,12 @@ define <3 x i32> @sv3i32_100(<3 x i32> %d, <3 x i32> %e) {
23872387
; CHECK-SD-NEXT: mov w9, v0.s[2]
23882388
; CHECK-SD-NEXT: movi v2.2s, #100
23892389
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
2390-
; CHECK-SD-NEXT: mov w10, #100 // =0x64
23912390
; CHECK-SD-NEXT: dup v1.2s, w8
23922391
; CHECK-SD-NEXT: smull x8, w9, w8
23932392
; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s
2394-
; CHECK-SD-NEXT: asr x8, x8, #37
2395-
; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
2393+
; CHECK-SD-NEXT: asr x10, x8, #37
2394+
; CHECK-SD-NEXT: add x8, x10, x8, lsr #63
2395+
; CHECK-SD-NEXT: mov w10, #100 // =0x64
23962396
; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37
23972397
; CHECK-SD-NEXT: msub w8, w8, w10, w9
23982398
; CHECK-SD-NEXT: xtn v1.2s, v1.2d

llvm/test/CodeGen/AArch64/srem-lkk.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ define i32 @fold_srem_positive_even(i32 %x) {
2323
; CHECK-LABEL: fold_srem_positive_even:
2424
; CHECK: // %bb.0:
2525
; CHECK-NEXT: mov w8, #36849 // =0x8ff1
26-
; CHECK-NEXT: mov w9, #1060 // =0x424
2726
; CHECK-NEXT: movk w8, #15827, lsl #16
2827
; CHECK-NEXT: smull x8, w0, w8
29-
; CHECK-NEXT: asr x8, x8, #40
30-
; CHECK-NEXT: add w8, w8, w8, lsr #31
28+
; CHECK-NEXT: asr x9, x8, #40
29+
; CHECK-NEXT: add x8, x9, x8, lsr #63
30+
; CHECK-NEXT: mov w9, #1060 // =0x424
3131
; CHECK-NEXT: msub w0, w8, w9, w0
3232
; CHECK-NEXT: ret
3333
%1 = srem i32 %x, 1060
@@ -39,11 +39,11 @@ define i32 @fold_srem_negative_odd(i32 %x) {
3939
; CHECK-LABEL: fold_srem_negative_odd:
4040
; CHECK: // %bb.0:
4141
; CHECK-NEXT: mov w8, #65445 // =0xffa5
42-
; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d
4342
; CHECK-NEXT: movk w8, #42330, lsl #16
4443
; CHECK-NEXT: smull x8, w0, w8
45-
; CHECK-NEXT: asr x8, x8, #40
46-
; CHECK-NEXT: add w8, w8, w8, lsr #31
44+
; CHECK-NEXT: asr x9, x8, #40
45+
; CHECK-NEXT: add x8, x9, x8, lsr #63
46+
; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d
4747
; CHECK-NEXT: msub w0, w8, w9, w0
4848
; CHECK-NEXT: ret
4949
%1 = srem i32 %x, -723
@@ -55,11 +55,11 @@ define i32 @fold_srem_negative_even(i32 %x) {
5555
; CHECK-LABEL: fold_srem_negative_even:
5656
; CHECK: // %bb.0:
5757
; CHECK-NEXT: mov w8, #62439 // =0xf3e7
58-
; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b
5958
; CHECK-NEXT: movk w8, #64805, lsl #16
6059
; CHECK-NEXT: smull x8, w0, w8
61-
; CHECK-NEXT: asr x8, x8, #40
62-
; CHECK-NEXT: add w8, w8, w8, lsr #31
60+
; CHECK-NEXT: asr x9, x8, #40
61+
; CHECK-NEXT: add x8, x9, x8, lsr #63
62+
; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b
6363
; CHECK-NEXT: msub w0, w8, w9, w0
6464
; CHECK-NEXT: ret
6565
%1 = srem i32 %x, -22981

llvm/test/CodeGen/RISCV/mul.ll

Lines changed: 25 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1829,67 +1829,53 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
18291829
; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
18301830
; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
18311831
; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
1832-
; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
1833-
; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill
1834-
; RV32I-NEXT: mv s2, a3
1835-
; RV32I-NEXT: mv s3, a2
1836-
; RV32I-NEXT: mv s0, a1
1837-
; RV32I-NEXT: mv s1, a0
1832+
; RV32I-NEXT: mv s0, a3
1833+
; RV32I-NEXT: mv s1, a2
1834+
; RV32I-NEXT: mv s2, a1
1835+
; RV32I-NEXT: mv s3, a0
18381836
; RV32I-NEXT: srai s4, a3, 31
18391837
; RV32I-NEXT: li a1, 0
18401838
; RV32I-NEXT: li a3, 0
18411839
; RV32I-NEXT: call __muldi3
18421840
; RV32I-NEXT: mv s5, a1
1843-
; RV32I-NEXT: mv a0, s0
1841+
; RV32I-NEXT: mv a0, s2
18441842
; RV32I-NEXT: li a1, 0
1845-
; RV32I-NEXT: mv a2, s3
1843+
; RV32I-NEXT: mv a2, s1
18461844
; RV32I-NEXT: li a3, 0
18471845
; RV32I-NEXT: call __muldi3
18481846
; RV32I-NEXT: add s5, a0, s5
18491847
; RV32I-NEXT: sltu a0, s5, a0
1850-
; RV32I-NEXT: add s7, a1, a0
1851-
; RV32I-NEXT: mv a0, s1
1848+
; RV32I-NEXT: add s6, a1, a0
1849+
; RV32I-NEXT: mv a0, s3
18521850
; RV32I-NEXT: li a1, 0
1853-
; RV32I-NEXT: mv a2, s2
1851+
; RV32I-NEXT: mv a2, s0
18541852
; RV32I-NEXT: li a3, 0
18551853
; RV32I-NEXT: call __muldi3
18561854
; RV32I-NEXT: add s5, a0, s5
18571855
; RV32I-NEXT: sltu a0, s5, a0
18581856
; RV32I-NEXT: add a0, a1, a0
1859-
; RV32I-NEXT: add s8, s7, a0
1860-
; RV32I-NEXT: mv a0, s0
1857+
; RV32I-NEXT: add s5, s6, a0
1858+
; RV32I-NEXT: mv a0, s2
18611859
; RV32I-NEXT: li a1, 0
1862-
; RV32I-NEXT: mv a2, s2
1860+
; RV32I-NEXT: mv a2, s0
18631861
; RV32I-NEXT: li a3, 0
18641862
; RV32I-NEXT: call __muldi3
1865-
; RV32I-NEXT: mv s5, a0
1866-
; RV32I-NEXT: mv s6, a1
1867-
; RV32I-NEXT: add s9, a0, s8
1868-
; RV32I-NEXT: mv a0, s3
1869-
; RV32I-NEXT: mv a1, s2
1870-
; RV32I-NEXT: li a2, 0
1871-
; RV32I-NEXT: li a3, 0
1872-
; RV32I-NEXT: call __muldi3
1873-
; RV32I-NEXT: mv s2, a0
1874-
; RV32I-NEXT: mv s3, a1
1863+
; RV32I-NEXT: mv s0, a0
1864+
; RV32I-NEXT: mv s1, a1
1865+
; RV32I-NEXT: add s7, a0, s5
18751866
; RV32I-NEXT: mv a0, s4
18761867
; RV32I-NEXT: mv a1, s4
1877-
; RV32I-NEXT: mv a2, s1
1878-
; RV32I-NEXT: mv a3, s0
1868+
; RV32I-NEXT: mv a2, s3
1869+
; RV32I-NEXT: mv a3, s2
18791870
; RV32I-NEXT: call __muldi3
1880-
; RV32I-NEXT: add s2, a0, s2
1881-
; RV32I-NEXT: sltu a3, s9, s5
1882-
; RV32I-NEXT: sltu a4, s8, s7
1883-
; RV32I-NEXT: add a1, a1, s3
1884-
; RV32I-NEXT: add a2, s9, s2
1885-
; RV32I-NEXT: add a4, s6, a4
1886-
; RV32I-NEXT: sltu a0, s2, a0
1887-
; RV32I-NEXT: sltu a5, a2, s9
1888-
; RV32I-NEXT: add a3, a4, a3
1889-
; RV32I-NEXT: add a0, a1, a0
1890-
; RV32I-NEXT: add a0, a3, a0
1891-
; RV32I-NEXT: add a1, a0, a5
1892-
; RV32I-NEXT: mv a0, a2
1871+
; RV32I-NEXT: add a0, s7, a0
1872+
; RV32I-NEXT: sltu a2, s7, s0
1873+
; RV32I-NEXT: sltu a3, s5, s6
1874+
; RV32I-NEXT: sltu a4, a0, s7
1875+
; RV32I-NEXT: add a3, s1, a3
1876+
; RV32I-NEXT: add a2, a3, a2
1877+
; RV32I-NEXT: add a1, a2, a1
1878+
; RV32I-NEXT: add a1, a1, a4
18931879
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
18941880
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
18951881
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -1899,8 +1885,6 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
18991885
; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
19001886
; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
19011887
; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
1902-
; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
1903-
; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload
19041888
; RV32I-NEXT: addi sp, sp, 48
19051889
; RV32I-NEXT: ret
19061890
;

0 commit comments

Comments
 (0)