Skip to content

Commit 7a73a8b

Browse files
authored
[RISCV] Allow large div peephole optimization for minsize (llvm#163679)
When `minsize` function attribute is set, division of large integers by power-of-2 is not optimized as it's expected by ExpandLargeDivRem pass, which results to compiler crash Co-authored-by: kvp <[email protected]>
1 parent 2505df0 commit 7a73a8b

File tree

2 files changed

+150
-1
lines changed

2 files changed

+150
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24830,7 +24830,8 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
2483024830
// instruction, as it is usually smaller than the alternative sequence.
2483124831
// TODO: Add vector division?
2483224832
bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24833-
return OptSize && !VT.isVector();
24833+
return OptSize && !VT.isVector() &&
24834+
VT.getSizeInBits() <= getMaxDivRemBitWidthSupported();
2483424835
}
2483524836

2483624837
bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {

llvm/test/CodeGen/RISCV/div_minsize.ll

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,151 @@ define i32 @testsize4(i32 %x) minsize nounwind {
6868
%div = udiv i32 %x, 33
6969
ret i32 %div
7070
}
71+
72+
define i128 @i128_sdiv(i128 %arg0) minsize nounwind {
73+
; RV32IM-LABEL: i128_sdiv:
74+
; RV32IM: # %bb.0:
75+
; RV32IM-NEXT: lw a2, 12(a1)
76+
; RV32IM-NEXT: lw a3, 8(a1)
77+
; RV32IM-NEXT: lw a4, 0(a1)
78+
; RV32IM-NEXT: lw a1, 4(a1)
79+
; RV32IM-NEXT: srai a5, a2, 31
80+
; RV32IM-NEXT: srli a5, a5, 30
81+
; RV32IM-NEXT: add a5, a4, a5
82+
; RV32IM-NEXT: sltu a4, a5, a4
83+
; RV32IM-NEXT: srli a5, a5, 2
84+
; RV32IM-NEXT: add a6, a1, a4
85+
; RV32IM-NEXT: sltu a1, a6, a1
86+
; RV32IM-NEXT: and a1, a4, a1
87+
; RV32IM-NEXT: srli a4, a6, 2
88+
; RV32IM-NEXT: slli a6, a6, 30
89+
; RV32IM-NEXT: or a5, a5, a6
90+
; RV32IM-NEXT: add a1, a3, a1
91+
; RV32IM-NEXT: srli a6, a1, 2
92+
; RV32IM-NEXT: sltu a3, a1, a3
93+
; RV32IM-NEXT: slli a1, a1, 30
94+
; RV32IM-NEXT: add a2, a2, a3
95+
; RV32IM-NEXT: or a1, a4, a1
96+
; RV32IM-NEXT: slli a3, a2, 30
97+
; RV32IM-NEXT: srai a2, a2, 2
98+
; RV32IM-NEXT: or a3, a6, a3
99+
; RV32IM-NEXT: sw a5, 0(a0)
100+
; RV32IM-NEXT: sw a1, 4(a0)
101+
; RV32IM-NEXT: sw a3, 8(a0)
102+
; RV32IM-NEXT: sw a2, 12(a0)
103+
; RV32IM-NEXT: ret
104+
;
105+
; RV64IM-LABEL: i128_sdiv:
106+
; RV64IM: # %bb.0:
107+
; RV64IM-NEXT: addi sp, sp, -16
108+
; RV64IM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
109+
; RV64IM-NEXT: li a2, 4
110+
; RV64IM-NEXT: li a3, 0
111+
; RV64IM-NEXT: call __divti3
112+
; RV64IM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
113+
; RV64IM-NEXT: addi sp, sp, 16
114+
; RV64IM-NEXT: ret
115+
%div = sdiv i128 %arg0, 4
116+
ret i128 %div
117+
}
118+
119+
define i256 @i256_sdiv(i256 %arg0) minsize nounwind {
120+
; RV32IM-LABEL: i256_sdiv:
121+
; RV32IM: # %bb.0:
122+
; RV32IM-NEXT: lw a5, 16(a1)
123+
; RV32IM-NEXT: lw a4, 20(a1)
124+
; RV32IM-NEXT: lw a2, 24(a1)
125+
; RV32IM-NEXT: lw a3, 28(a1)
126+
; RV32IM-NEXT: lw a6, 0(a1)
127+
; RV32IM-NEXT: lw a7, 4(a1)
128+
; RV32IM-NEXT: lw t0, 8(a1)
129+
; RV32IM-NEXT: lw t1, 12(a1)
130+
; RV32IM-NEXT: srai a1, a3, 31
131+
; RV32IM-NEXT: srli a1, a1, 30
132+
; RV32IM-NEXT: add a1, a6, a1
133+
; RV32IM-NEXT: sltu t2, a1, a6
134+
; RV32IM-NEXT: add a6, a7, t2
135+
; RV32IM-NEXT: sltu a7, a6, a7
136+
; RV32IM-NEXT: and t2, t2, a7
137+
; RV32IM-NEXT: add a7, t0, t2
138+
; RV32IM-NEXT: sltu t3, a7, t0
139+
; RV32IM-NEXT: add t0, t1, t3
140+
; RV32IM-NEXT: beqz t2, .LBB5_2
141+
; RV32IM-NEXT: # %bb.1:
142+
; RV32IM-NEXT: sltu t1, t0, t1
143+
; RV32IM-NEXT: and t2, t3, t1
144+
; RV32IM-NEXT: .LBB5_2:
145+
; RV32IM-NEXT: add t2, a5, t2
146+
; RV32IM-NEXT: srli t1, t0, 2
147+
; RV32IM-NEXT: srli t3, a7, 2
148+
; RV32IM-NEXT: slli t0, t0, 30
149+
; RV32IM-NEXT: slli a7, a7, 30
150+
; RV32IM-NEXT: or t0, t3, t0
151+
; RV32IM-NEXT: srli t3, a6, 2
152+
; RV32IM-NEXT: srli a1, a1, 2
153+
; RV32IM-NEXT: slli a6, a6, 30
154+
; RV32IM-NEXT: sltu a5, t2, a5
155+
; RV32IM-NEXT: or a7, t3, a7
156+
; RV32IM-NEXT: srli t3, t2, 2
157+
; RV32IM-NEXT: slli t2, t2, 30
158+
; RV32IM-NEXT: or a1, a1, a6
159+
; RV32IM-NEXT: add a6, a4, a5
160+
; RV32IM-NEXT: or t1, t1, t2
161+
; RV32IM-NEXT: sltu a4, a6, a4
162+
; RV32IM-NEXT: srli t2, a6, 2
163+
; RV32IM-NEXT: slli a6, a6, 30
164+
; RV32IM-NEXT: sw a1, 0(a0)
165+
; RV32IM-NEXT: sw a7, 4(a0)
166+
; RV32IM-NEXT: sw t0, 8(a0)
167+
; RV32IM-NEXT: sw t1, 12(a0)
168+
; RV32IM-NEXT: and a4, a5, a4
169+
; RV32IM-NEXT: or a1, t3, a6
170+
; RV32IM-NEXT: add a4, a2, a4
171+
; RV32IM-NEXT: srli a5, a4, 2
172+
; RV32IM-NEXT: sltu a2, a4, a2
173+
; RV32IM-NEXT: slli a4, a4, 30
174+
; RV32IM-NEXT: add a2, a3, a2
175+
; RV32IM-NEXT: or a3, t2, a4
176+
; RV32IM-NEXT: slli a4, a2, 30
177+
; RV32IM-NEXT: srai a2, a2, 2
178+
; RV32IM-NEXT: or a4, a5, a4
179+
; RV32IM-NEXT: sw a1, 16(a0)
180+
; RV32IM-NEXT: sw a3, 20(a0)
181+
; RV32IM-NEXT: sw a4, 24(a0)
182+
; RV32IM-NEXT: sw a2, 28(a0)
183+
; RV32IM-NEXT: ret
184+
;
185+
; RV64IM-LABEL: i256_sdiv:
186+
; RV64IM: # %bb.0:
187+
; RV64IM-NEXT: ld a2, 24(a1)
188+
; RV64IM-NEXT: ld a3, 16(a1)
189+
; RV64IM-NEXT: ld a4, 0(a1)
190+
; RV64IM-NEXT: ld a1, 8(a1)
191+
; RV64IM-NEXT: srai a5, a2, 63
192+
; RV64IM-NEXT: srli a5, a5, 62
193+
; RV64IM-NEXT: add a5, a4, a5
194+
; RV64IM-NEXT: sltu a4, a5, a4
195+
; RV64IM-NEXT: srli a5, a5, 2
196+
; RV64IM-NEXT: add a6, a1, a4
197+
; RV64IM-NEXT: sltu a1, a6, a1
198+
; RV64IM-NEXT: and a1, a4, a1
199+
; RV64IM-NEXT: srli a4, a6, 2
200+
; RV64IM-NEXT: slli a6, a6, 62
201+
; RV64IM-NEXT: or a5, a5, a6
202+
; RV64IM-NEXT: add a1, a3, a1
203+
; RV64IM-NEXT: srli a6, a1, 2
204+
; RV64IM-NEXT: sltu a3, a1, a3
205+
; RV64IM-NEXT: slli a1, a1, 62
206+
; RV64IM-NEXT: add a2, a2, a3
207+
; RV64IM-NEXT: or a1, a4, a1
208+
; RV64IM-NEXT: slli a3, a2, 62
209+
; RV64IM-NEXT: srai a2, a2, 2
210+
; RV64IM-NEXT: or a3, a6, a3
211+
; RV64IM-NEXT: sd a5, 0(a0)
212+
; RV64IM-NEXT: sd a1, 8(a0)
213+
; RV64IM-NEXT: sd a3, 16(a0)
214+
; RV64IM-NEXT: sd a2, 24(a0)
215+
; RV64IM-NEXT: ret
216+
%div = sdiv i256 %arg0, 4
217+
ret i256 %div
218+
}

0 commit comments

Comments
 (0)