Skip to content

Commit 0b1318f

Browse files
authored
[DAG] Fold rem(rem(A, BCst), Op1Cst) -> rem(A, Op1Cst) (#159517)
Fixes [157370](#157370) UREM General proof: https://alive2.llvm.org/ce/z/b_GQJX SREM General proof: https://alive2.llvm.org/ce/z/Whkaxh I have added it as rv32i and rv64i tests because they are the only architectures where I could verify that it works.
1 parent cca769a commit 0b1318f

File tree

3 files changed

+195
-0
lines changed

3 files changed

+195
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5442,6 +5442,24 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
54425442
if (SDValue DivRem = useDivRem(N))
54435443
return DivRem.getValue(1);
54445444

5445+
// fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst)
5446+
// iff urem(BCst, Op1Cst) == 0
5447+
SDValue A;
5448+
APInt Op1Cst, BCst;
5449+
if (sd_match(N, m_URem(m_URem(m_Value(A), m_ConstInt(BCst)),
5450+
m_ConstInt(Op1Cst))) &&
5451+
BCst.urem(Op1Cst).isZero()) {
5452+
return DAG.getNode(ISD::UREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5453+
}
5454+
5455+
// fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst)
5456+
// iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1
5457+
if (sd_match(N, m_SRem(m_SRem(m_Value(A), m_ConstInt(BCst)),
5458+
m_ConstInt(Op1Cst))) &&
5459+
BCst.srem(Op1Cst).isZero() && !Op1Cst.isAllOnes()) {
5460+
return DAG.getNode(ISD::SREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5461+
}
5462+
54455463
return SDValue();
54465464
}
54475465

llvm/test/CodeGen/RISCV/srem.ll

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3+
; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
4+
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
5+
; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
6+
7+
define i32 @fold_srem_constants(i32 %v0) nounwind {
8+
; RV32I-LABEL: fold_srem_constants:
9+
; RV32I: # %bb.0:
10+
; RV32I-NEXT: li a1, 5
11+
; RV32I-NEXT: tail __modsi3
12+
;
13+
; RV64I-LABEL: fold_srem_constants:
14+
; RV64I: # %bb.0:
15+
; RV64I-NEXT: addi sp, sp, -16
16+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
17+
; RV64I-NEXT: sext.w a0, a0
18+
; RV64I-NEXT: li a1, 5
19+
; RV64I-NEXT: call __moddi3
20+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
21+
; RV64I-NEXT: addi sp, sp, 16
22+
; RV64I-NEXT: ret
23+
%v1 = srem i32 %v0, 25
24+
%v2 = srem i32 %v1, 5
25+
ret i32 %v2
26+
}
27+
28+
define i32 @dont_fold_srem_constants(i32 %v0) nounwind {
29+
; RV32I-LABEL: dont_fold_srem_constants:
30+
; RV32I: # %bb.0:
31+
; RV32I-NEXT: addi sp, sp, -16
32+
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
33+
; RV32I-NEXT: li a1, 25
34+
; RV32I-NEXT: call __modsi3
35+
; RV32I-NEXT: li a1, 3
36+
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
37+
; RV32I-NEXT: addi sp, sp, 16
38+
; RV32I-NEXT: tail __modsi3
39+
;
40+
; RV64I-LABEL: dont_fold_srem_constants:
41+
; RV64I: # %bb.0:
42+
; RV64I-NEXT: addi sp, sp, -16
43+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
44+
; RV64I-NEXT: sext.w a0, a0
45+
; RV64I-NEXT: li a1, 25
46+
; RV64I-NEXT: call __moddi3
47+
; RV64I-NEXT: li a1, 3
48+
; RV64I-NEXT: call __moddi3
49+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
50+
; RV64I-NEXT: addi sp, sp, 16
51+
; RV64I-NEXT: ret
52+
%v1 = srem i32 %v0, 25
53+
%v2 = srem i32 %v1, 3
54+
ret i32 %v2
55+
}
56+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
57+
; CHECK: {{.*}}

llvm/test/CodeGen/RISCV/urem.ll

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3+
; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
4+
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
5+
; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
6+
7+
define i32 @fold_urem_constants(i32 %v0) nounwind {
8+
; RV32I-LABEL: fold_urem_constants:
9+
; RV32I: # %bb.0:
10+
; RV32I-NEXT: li a1, 5
11+
; RV32I-NEXT: tail __umodsi3
12+
;
13+
; RV64I-LABEL: fold_urem_constants:
14+
; RV64I: # %bb.0:
15+
; RV64I-NEXT: addi sp, sp, -16
16+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
17+
; RV64I-NEXT: slli a0, a0, 32
18+
; RV64I-NEXT: srli a0, a0, 32
19+
; RV64I-NEXT: li a1, 5
20+
; RV64I-NEXT: call __umoddi3
21+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
22+
; RV64I-NEXT: addi sp, sp, 16
23+
; RV64I-NEXT: ret
24+
%v1 = urem i32 %v0, 25
25+
%v2 = urem i32 %v1, 5
26+
ret i32 %v2
27+
}
28+
29+
define i32 @dont_fold_urem_constants(i32 %v0) nounwind {
30+
; RV32I-LABEL: dont_fold_urem_constants:
31+
; RV32I: # %bb.0:
32+
; RV32I-NEXT: addi sp, sp, -16
33+
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
34+
; RV32I-NEXT: li a1, 25
35+
; RV32I-NEXT: call __umodsi3
36+
; RV32I-NEXT: li a1, 3
37+
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
38+
; RV32I-NEXT: addi sp, sp, 16
39+
; RV32I-NEXT: tail __umodsi3
40+
;
41+
; RV64I-LABEL: dont_fold_urem_constants:
42+
; RV64I: # %bb.0:
43+
; RV64I-NEXT: addi sp, sp, -16
44+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
45+
; RV64I-NEXT: slli a0, a0, 32
46+
; RV64I-NEXT: srli a0, a0, 32
47+
; RV64I-NEXT: li a1, 25
48+
; RV64I-NEXT: call __umoddi3
49+
; RV64I-NEXT: li a1, 3
50+
; RV64I-NEXT: call __umoddi3
51+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
52+
; RV64I-NEXT: addi sp, sp, 16
53+
; RV64I-NEXT: ret
54+
%v1 = urem i32 %v0, 25
55+
%v2 = urem i32 %v1, 3
56+
ret i32 %v2
57+
}
58+
59+
define i32 @dont_fold_urem_srem_mixed_constants(i32 %v0) nounwind {
60+
; RV32I-LABEL: dont_fold_urem_srem_mixed_constants:
61+
; RV32I: # %bb.0:
62+
; RV32I-NEXT: addi sp, sp, -16
63+
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
64+
; RV32I-NEXT: li a1, 25
65+
; RV32I-NEXT: call __umodsi3
66+
; RV32I-NEXT: li a1, 3
67+
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
68+
; RV32I-NEXT: addi sp, sp, 16
69+
; RV32I-NEXT: tail __umodsi3
70+
;
71+
; RV64I-LABEL: dont_fold_urem_srem_mixed_constants:
72+
; RV64I: # %bb.0:
73+
; RV64I-NEXT: addi sp, sp, -16
74+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
75+
; RV64I-NEXT: slli a0, a0, 32
76+
; RV64I-NEXT: srli a0, a0, 32
77+
; RV64I-NEXT: li a1, 25
78+
; RV64I-NEXT: call __umoddi3
79+
; RV64I-NEXT: li a1, 3
80+
; RV64I-NEXT: call __umoddi3
81+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
82+
; RV64I-NEXT: addi sp, sp, 16
83+
; RV64I-NEXT: ret
84+
%v1 = urem i32 %v0, 25
85+
%v2 = srem i32 %v1, 3
86+
ret i32 %v2
87+
}
88+
89+
define i32 @dont_fold_srem_urem_mixed_constants(i32 %v0) nounwind {
90+
; RV32I-LABEL: dont_fold_srem_urem_mixed_constants:
91+
; RV32I: # %bb.0:
92+
; RV32I-NEXT: addi sp, sp, -16
93+
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
94+
; RV32I-NEXT: li a1, 25
95+
; RV32I-NEXT: call __modsi3
96+
; RV32I-NEXT: li a1, 3
97+
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
98+
; RV32I-NEXT: addi sp, sp, 16
99+
; RV32I-NEXT: tail __umodsi3
100+
;
101+
; RV64I-LABEL: dont_fold_srem_urem_mixed_constants:
102+
; RV64I: # %bb.0:
103+
; RV64I-NEXT: addi sp, sp, -16
104+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
105+
; RV64I-NEXT: sext.w a0, a0
106+
; RV64I-NEXT: li a1, 25
107+
; RV64I-NEXT: call __moddi3
108+
; RV64I-NEXT: slli a0, a0, 32
109+
; RV64I-NEXT: srli a0, a0, 32
110+
; RV64I-NEXT: li a1, 3
111+
; RV64I-NEXT: call __umoddi3
112+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
113+
; RV64I-NEXT: addi sp, sp, 16
114+
; RV64I-NEXT: ret
115+
%v1 = srem i32 %v0, 25
116+
%v2 = urem i32 %v1, 3
117+
ret i32 %v2
118+
}
119+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
120+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)