@@ -25,15 +25,16 @@ define i32 @andn_i32(i32 %a, i32 %b) nounwind {
2525define i32 @andn_i32_from_sub (i32 %a , i32 %b ) nounwind {
2626; RV32I-LABEL: andn_i32_from_sub:
2727; RV32I: # %bb.0:
28- ; RV32I-NEXT: neg a1, a1
29- ; RV32I-NEXT: and a0, a1, a0
28+ ; RV32I-NEXT: li a2, -1
29+ ; RV32I-NEXT: sub a2, a2, a1
30+ ; RV32I-NEXT: and a0, a2, a0
3031; RV32I-NEXT: ret
3132;
3233; RV32ZBB-ZBKB-LABEL: andn_i32_from_sub:
3334; RV32ZBB-ZBKB: # %bb.0:
3435; RV32ZBB-ZBKB-NEXT: andn a0, a0, a1
3536; RV32ZBB-ZBKB-NEXT: ret
36- %neg = sub i32 0 , %b
37+ %neg = sub i32 - 1 , %b
3738 %and = and i32 %neg , %a
3839 ret i32 %and
3940}
@@ -60,23 +61,25 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
6061define i64 @andn_i64_from_sub (i64 %a , i64 %b ) nounwind {
6162; RV32I-LABEL: andn_i64_from_sub:
6263; RV32I: # %bb.0:
63- ; RV32I-NEXT: neg a4, a2
64- ; RV32I-NEXT: snez a2, a2
65- ; RV32I-NEXT: neg a3, a3
66- ; RV32I-NEXT: sub a3, a3, a2
67- ; RV32I-NEXT: and a0, a4, a0
68- ; RV32I-NEXT: and a1, a3, a1
64+ ; RV32I-NEXT: li a4, -1
65+ ; RV32I-NEXT: sub a5, a4, a2
66+ ; RV32I-NEXT: sltu a2, a4, a2
67+ ; RV32I-NEXT: sub a4, a4, a3
68+ ; RV32I-NEXT: sub a4, a4, a2
69+ ; RV32I-NEXT: and a0, a5, a0
70+ ; RV32I-NEXT: and a1, a4, a1
6971; RV32I-NEXT: ret
7072;
7173; RV32ZBB-ZBKB-LABEL: andn_i64_from_sub:
7274; RV32ZBB-ZBKB: # %bb.0:
73- ; RV32ZBB-ZBKB-NEXT: snez a4, a2
74- ; RV32ZBB-ZBKB-NEXT: neg a3, a3
75- ; RV32ZBB-ZBKB-NEXT: sub a3, a3, a4
75+ ; RV32ZBB-ZBKB-NEXT: li a4, -1
76+ ; RV32ZBB-ZBKB-NEXT: sltu a5, a4, a2
77+ ; RV32ZBB-ZBKB-NEXT: sub a4, a4, a3
78+ ; RV32ZBB-ZBKB-NEXT: sub a4, a4, a5
7679; RV32ZBB-ZBKB-NEXT: andn a0, a0, a2
77- ; RV32ZBB-ZBKB-NEXT: and a1, a3 , a1
80+ ; RV32ZBB-ZBKB-NEXT: and a1, a4 , a1
7881; RV32ZBB-ZBKB-NEXT: ret
79- %neg = sub i64 0 , %b
82+ %neg = sub i64 - 1 , %b
8083 %and = and i64 %neg , %a
8184 ret i64 %and
8285}
@@ -176,102 +179,53 @@ define i32 @rol_i32(i32 %a, i32 %b) nounwind {
176179declare i64 @llvm.fshl.i64 (i64 , i64 , i64 )
177180
178181define i64 @rol_i64 (i64 %a , i64 %b ) nounwind {
179- ; RV32I-LABEL: rol_i64:
180- ; RV32I: # %bb.0:
181- ; RV32I-NEXT: andi a6, a2, 63
182- ; RV32I-NEXT: li a4, 32
183- ; RV32I-NEXT: bltu a6, a4, .LBB9_2
184- ; RV32I-NEXT: # %bb.1:
185- ; RV32I-NEXT: li a3, 0
186- ; RV32I-NEXT: sll a7, a0, a6
187- ; RV32I-NEXT: j .LBB9_3
188- ; RV32I-NEXT: .LBB9_2:
189- ; RV32I-NEXT: sll a3, a0, a2
190- ; RV32I-NEXT: neg a5, a6
191- ; RV32I-NEXT: srl a5, a0, a5
192- ; RV32I-NEXT: sll a7, a1, a2
193- ; RV32I-NEXT: or a7, a5, a7
194- ; RV32I-NEXT: .LBB9_3:
195- ; RV32I-NEXT: neg a5, a2
196- ; RV32I-NEXT: mv a2, a1
197- ; RV32I-NEXT: beqz a6, .LBB9_5
198- ; RV32I-NEXT: # %bb.4:
199- ; RV32I-NEXT: mv a2, a7
200- ; RV32I-NEXT: .LBB9_5:
201- ; RV32I-NEXT: andi a6, a5, 63
202- ; RV32I-NEXT: bltu a6, a4, .LBB9_7
203- ; RV32I-NEXT: # %bb.6:
204- ; RV32I-NEXT: srl a7, a1, a6
205- ; RV32I-NEXT: bnez a6, .LBB9_8
206- ; RV32I-NEXT: j .LBB9_9
207- ; RV32I-NEXT: .LBB9_7:
208- ; RV32I-NEXT: srl a7, a0, a5
209- ; RV32I-NEXT: neg t0, a6
210- ; RV32I-NEXT: sll t0, a1, t0
211- ; RV32I-NEXT: or a7, a7, t0
212- ; RV32I-NEXT: beqz a6, .LBB9_9
213- ; RV32I-NEXT: .LBB9_8:
214- ; RV32I-NEXT: mv a0, a7
215- ; RV32I-NEXT: .LBB9_9:
216- ; RV32I-NEXT: bltu a6, a4, .LBB9_11
217- ; RV32I-NEXT: # %bb.10:
218- ; RV32I-NEXT: li a1, 0
219- ; RV32I-NEXT: j .LBB9_12
220- ; RV32I-NEXT: .LBB9_11:
221- ; RV32I-NEXT: srl a1, a1, a5
222- ; RV32I-NEXT: .LBB9_12:
223- ; RV32I-NEXT: or a0, a3, a0
224- ; RV32I-NEXT: or a1, a2, a1
225- ; RV32I-NEXT: ret
226- ;
227- ; RV32ZBB-ZBKB-LABEL: rol_i64:
228- ; RV32ZBB-ZBKB: # %bb.0:
229- ; RV32ZBB-ZBKB-NEXT: andi a6, a2, 63
230- ; RV32ZBB-ZBKB-NEXT: li a4, 32
231- ; RV32ZBB-ZBKB-NEXT: bltu a6, a4, .LBB9_2
232- ; RV32ZBB-ZBKB-NEXT: # %bb.1:
233- ; RV32ZBB-ZBKB-NEXT: li a3, 0
234- ; RV32ZBB-ZBKB-NEXT: sll a7, a0, a6
235- ; RV32ZBB-ZBKB-NEXT: j .LBB9_3
236- ; RV32ZBB-ZBKB-NEXT: .LBB9_2:
237- ; RV32ZBB-ZBKB-NEXT: sll a3, a0, a2
238- ; RV32ZBB-ZBKB-NEXT: neg a5, a6
239- ; RV32ZBB-ZBKB-NEXT: srl a5, a0, a5
240- ; RV32ZBB-ZBKB-NEXT: sll a7, a1, a2
241- ; RV32ZBB-ZBKB-NEXT: or a7, a5, a7
242- ; RV32ZBB-ZBKB-NEXT: .LBB9_3:
243- ; RV32ZBB-ZBKB-NEXT: li t0, 63
244- ; RV32ZBB-ZBKB-NEXT: mv a5, a1
245- ; RV32ZBB-ZBKB-NEXT: beqz a6, .LBB9_5
246- ; RV32ZBB-ZBKB-NEXT: # %bb.4:
247- ; RV32ZBB-ZBKB-NEXT: mv a5, a7
248- ; RV32ZBB-ZBKB-NEXT: .LBB9_5:
249- ; RV32ZBB-ZBKB-NEXT: andn a6, t0, a2
250- ; RV32ZBB-ZBKB-NEXT: neg a2, a2
251- ; RV32ZBB-ZBKB-NEXT: bltu a6, a4, .LBB9_7
252- ; RV32ZBB-ZBKB-NEXT: # %bb.6:
253- ; RV32ZBB-ZBKB-NEXT: srl a7, a1, a6
254- ; RV32ZBB-ZBKB-NEXT: bnez a6, .LBB9_8
255- ; RV32ZBB-ZBKB-NEXT: j .LBB9_9
256- ; RV32ZBB-ZBKB-NEXT: .LBB9_7:
257- ; RV32ZBB-ZBKB-NEXT: srl a7, a0, a2
258- ; RV32ZBB-ZBKB-NEXT: neg t0, a6
259- ; RV32ZBB-ZBKB-NEXT: sll t0, a1, t0
260- ; RV32ZBB-ZBKB-NEXT: or a7, a7, t0
261- ; RV32ZBB-ZBKB-NEXT: beqz a6, .LBB9_9
262- ; RV32ZBB-ZBKB-NEXT: .LBB9_8:
263- ; RV32ZBB-ZBKB-NEXT: mv a0, a7
264- ; RV32ZBB-ZBKB-NEXT: .LBB9_9:
265- ; RV32ZBB-ZBKB-NEXT: bltu a6, a4, .LBB9_11
266- ; RV32ZBB-ZBKB-NEXT: # %bb.10:
267- ; RV32ZBB-ZBKB-NEXT: li a1, 0
268- ; RV32ZBB-ZBKB-NEXT: j .LBB9_12
269- ; RV32ZBB-ZBKB-NEXT: .LBB9_11:
270- ; RV32ZBB-ZBKB-NEXT: srl a1, a1, a2
271- ; RV32ZBB-ZBKB-NEXT: .LBB9_12:
272- ; RV32ZBB-ZBKB-NEXT: or a0, a3, a0
273- ; RV32ZBB-ZBKB-NEXT: or a1, a5, a1
274- ; RV32ZBB-ZBKB-NEXT: ret
182+ ; CHECK-LABEL: rol_i64:
183+ ; CHECK: # %bb.0:
184+ ; CHECK-NEXT: andi a6, a2, 63
185+ ; CHECK-NEXT: li a4, 32
186+ ; CHECK-NEXT: bltu a6, a4, .LBB9_2
187+ ; CHECK-NEXT: # %bb.1:
188+ ; CHECK-NEXT: li a3, 0
189+ ; CHECK-NEXT: sll a7, a0, a6
190+ ; CHECK-NEXT: j .LBB9_3
191+ ; CHECK-NEXT: .LBB9_2:
192+ ; CHECK-NEXT: sll a3, a0, a2
193+ ; CHECK-NEXT: neg a5, a6
194+ ; CHECK-NEXT: srl a5, a0, a5
195+ ; CHECK-NEXT: sll a7, a1, a2
196+ ; CHECK-NEXT: or a7, a5, a7
197+ ; CHECK-NEXT: .LBB9_3:
198+ ; CHECK-NEXT: neg a5, a2
199+ ; CHECK-NEXT: mv a2, a1
200+ ; CHECK-NEXT: beqz a6, .LBB9_5
201+ ; CHECK-NEXT: # %bb.4:
202+ ; CHECK-NEXT: mv a2, a7
203+ ; CHECK-NEXT: .LBB9_5:
204+ ; CHECK-NEXT: andi a6, a5, 63
205+ ; CHECK-NEXT: bltu a6, a4, .LBB9_7
206+ ; CHECK-NEXT: # %bb.6:
207+ ; CHECK-NEXT: srl a7, a1, a6
208+ ; CHECK-NEXT: bnez a6, .LBB9_8
209+ ; CHECK-NEXT: j .LBB9_9
210+ ; CHECK-NEXT: .LBB9_7:
211+ ; CHECK-NEXT: srl a7, a0, a5
212+ ; CHECK-NEXT: neg t0, a6
213+ ; CHECK-NEXT: sll t0, a1, t0
214+ ; CHECK-NEXT: or a7, a7, t0
215+ ; CHECK-NEXT: beqz a6, .LBB9_9
216+ ; CHECK-NEXT: .LBB9_8:
217+ ; CHECK-NEXT: mv a0, a7
218+ ; CHECK-NEXT: .LBB9_9:
219+ ; CHECK-NEXT: bltu a6, a4, .LBB9_11
220+ ; CHECK-NEXT: # %bb.10:
221+ ; CHECK-NEXT: li a1, 0
222+ ; CHECK-NEXT: j .LBB9_12
223+ ; CHECK-NEXT: .LBB9_11:
224+ ; CHECK-NEXT: srl a1, a1, a5
225+ ; CHECK-NEXT: .LBB9_12:
226+ ; CHECK-NEXT: or a0, a3, a0
227+ ; CHECK-NEXT: or a1, a2, a1
228+ ; CHECK-NEXT: ret
275229 %or = tail call i64 @llvm.fshl.i64 (i64 %a , i64 %a , i64 %b )
276230 ret i64 %or
277231}
@@ -301,104 +255,54 @@ define i32 @ror_i32(i32 %a, i32 %b) nounwind {
301255declare i64 @llvm.fshr.i64 (i64 , i64 , i64 )
302256
303257define i64 @ror_i64 (i64 %a , i64 %b ) nounwind {
304- ; RV32I-LABEL: ror_i64:
305- ; RV32I: # %bb.0:
306- ; RV32I-NEXT: andi a5, a2, 63
307- ; RV32I-NEXT: li a4, 32
308- ; RV32I-NEXT: bltu a5, a4, .LBB11_2
309- ; RV32I-NEXT: # %bb.1:
310- ; RV32I-NEXT: srl a6, a1, a5
311- ; RV32I-NEXT: mv a3, a0
312- ; RV32I-NEXT: bnez a5, .LBB11_3
313- ; RV32I-NEXT: j .LBB11_4
314- ; RV32I-NEXT: .LBB11_2:
315- ; RV32I-NEXT: srl a3, a0, a2
316- ; RV32I-NEXT: neg a6, a5
317- ; RV32I-NEXT: sll a6, a1, a6
318- ; RV32I-NEXT: or a6, a3, a6
319- ; RV32I-NEXT: mv a3, a0
320- ; RV32I-NEXT: beqz a5, .LBB11_4
321- ; RV32I-NEXT: .LBB11_3:
322- ; RV32I-NEXT: mv a3, a6
323- ; RV32I-NEXT: .LBB11_4:
324- ; RV32I-NEXT: neg a6, a2
325- ; RV32I-NEXT: bltu a5, a4, .LBB11_7
326- ; RV32I-NEXT: # %bb.5:
327- ; RV32I-NEXT: li a2, 0
328- ; RV32I-NEXT: andi a5, a6, 63
329- ; RV32I-NEXT: bgeu a5, a4, .LBB11_8
330- ; RV32I-NEXT: .LBB11_6:
331- ; RV32I-NEXT: sll a4, a0, a6
332- ; RV32I-NEXT: neg a7, a5
333- ; RV32I-NEXT: srl a0, a0, a7
334- ; RV32I-NEXT: sll a6, a1, a6
335- ; RV32I-NEXT: or a0, a0, a6
336- ; RV32I-NEXT: bnez a5, .LBB11_9
337- ; RV32I-NEXT: j .LBB11_10
338- ; RV32I-NEXT: .LBB11_7:
339- ; RV32I-NEXT: srl a2, a1, a2
340- ; RV32I-NEXT: andi a5, a6, 63
341- ; RV32I-NEXT: bltu a5, a4, .LBB11_6
342- ; RV32I-NEXT: .LBB11_8:
343- ; RV32I-NEXT: li a4, 0
344- ; RV32I-NEXT: sll a0, a0, a5
345- ; RV32I-NEXT: beqz a5, .LBB11_10
346- ; RV32I-NEXT: .LBB11_9:
347- ; RV32I-NEXT: mv a1, a0
348- ; RV32I-NEXT: .LBB11_10:
349- ; RV32I-NEXT: or a0, a3, a4
350- ; RV32I-NEXT: or a1, a2, a1
351- ; RV32I-NEXT: ret
352- ;
353- ; RV32ZBB-ZBKB-LABEL: ror_i64:
354- ; RV32ZBB-ZBKB: # %bb.0:
355- ; RV32ZBB-ZBKB-NEXT: andi a4, a2, 63
356- ; RV32ZBB-ZBKB-NEXT: li a5, 32
357- ; RV32ZBB-ZBKB-NEXT: bltu a4, a5, .LBB11_2
358- ; RV32ZBB-ZBKB-NEXT: # %bb.1:
359- ; RV32ZBB-ZBKB-NEXT: srl a6, a1, a4
360- ; RV32ZBB-ZBKB-NEXT: mv a3, a0
361- ; RV32ZBB-ZBKB-NEXT: bnez a4, .LBB11_3
362- ; RV32ZBB-ZBKB-NEXT: j .LBB11_4
363- ; RV32ZBB-ZBKB-NEXT: .LBB11_2:
364- ; RV32ZBB-ZBKB-NEXT: srl a3, a0, a2
365- ; RV32ZBB-ZBKB-NEXT: neg a6, a4
366- ; RV32ZBB-ZBKB-NEXT: sll a6, a1, a6
367- ; RV32ZBB-ZBKB-NEXT: or a6, a3, a6
368- ; RV32ZBB-ZBKB-NEXT: mv a3, a0
369- ; RV32ZBB-ZBKB-NEXT: beqz a4, .LBB11_4
370- ; RV32ZBB-ZBKB-NEXT: .LBB11_3:
371- ; RV32ZBB-ZBKB-NEXT: mv a3, a6
372- ; RV32ZBB-ZBKB-NEXT: .LBB11_4:
373- ; RV32ZBB-ZBKB-NEXT: li a6, 63
374- ; RV32ZBB-ZBKB-NEXT: bltu a4, a5, .LBB11_7
375- ; RV32ZBB-ZBKB-NEXT: # %bb.5:
376- ; RV32ZBB-ZBKB-NEXT: li a4, 0
377- ; RV32ZBB-ZBKB-NEXT: andn a6, a6, a2
378- ; RV32ZBB-ZBKB-NEXT: bgeu a6, a5, .LBB11_8
379- ; RV32ZBB-ZBKB-NEXT: .LBB11_6:
380- ; RV32ZBB-ZBKB-NEXT: neg a5, a2
381- ; RV32ZBB-ZBKB-NEXT: neg a7, a6
382- ; RV32ZBB-ZBKB-NEXT: sll a2, a0, a5
383- ; RV32ZBB-ZBKB-NEXT: srl a0, a0, a7
384- ; RV32ZBB-ZBKB-NEXT: sll a5, a1, a5
385- ; RV32ZBB-ZBKB-NEXT: or a0, a0, a5
386- ; RV32ZBB-ZBKB-NEXT: bnez a6, .LBB11_9
387- ; RV32ZBB-ZBKB-NEXT: j .LBB11_10
388- ; RV32ZBB-ZBKB-NEXT: .LBB11_7:
389- ; RV32ZBB-ZBKB-NEXT: srl a4, a1, a2
390- ; RV32ZBB-ZBKB-NEXT: andn a6, a6, a2
391- ; RV32ZBB-ZBKB-NEXT: bltu a6, a5, .LBB11_6
392- ; RV32ZBB-ZBKB-NEXT: .LBB11_8:
393- ; RV32ZBB-ZBKB-NEXT: li a2, 0
394- ; RV32ZBB-ZBKB-NEXT: sll a0, a0, a6
395- ; RV32ZBB-ZBKB-NEXT: beqz a6, .LBB11_10
396- ; RV32ZBB-ZBKB-NEXT: .LBB11_9:
397- ; RV32ZBB-ZBKB-NEXT: mv a1, a0
398- ; RV32ZBB-ZBKB-NEXT: .LBB11_10:
399- ; RV32ZBB-ZBKB-NEXT: or a0, a3, a2
400- ; RV32ZBB-ZBKB-NEXT: or a1, a4, a1
401- ; RV32ZBB-ZBKB-NEXT: ret
258+ ; CHECK-LABEL: ror_i64:
259+ ; CHECK: # %bb.0:
260+ ; CHECK-NEXT: andi a5, a2, 63
261+ ; CHECK-NEXT: li a4, 32
262+ ; CHECK-NEXT: bltu a5, a4, .LBB11_2
263+ ; CHECK-NEXT: # %bb.1:
264+ ; CHECK-NEXT: srl a6, a1, a5
265+ ; CHECK-NEXT: mv a3, a0
266+ ; CHECK-NEXT: bnez a5, .LBB11_3
267+ ; CHECK-NEXT: j .LBB11_4
268+ ; CHECK-NEXT: .LBB11_2:
269+ ; CHECK-NEXT: srl a3, a0, a2
270+ ; CHECK-NEXT: neg a6, a5
271+ ; CHECK-NEXT: sll a6, a1, a6
272+ ; CHECK-NEXT: or a6, a3, a6
273+ ; CHECK-NEXT: mv a3, a0
274+ ; CHECK-NEXT: beqz a5, .LBB11_4
275+ ; CHECK-NEXT: .LBB11_3:
276+ ; CHECK-NEXT: mv a3, a6
277+ ; CHECK-NEXT: .LBB11_4:
278+ ; CHECK-NEXT: neg a6, a2
279+ ; CHECK-NEXT: bltu a5, a4, .LBB11_7
280+ ; CHECK-NEXT: # %bb.5:
281+ ; CHECK-NEXT: li a2, 0
282+ ; CHECK-NEXT: andi a5, a6, 63
283+ ; CHECK-NEXT: bgeu a5, a4, .LBB11_8
284+ ; CHECK-NEXT: .LBB11_6:
285+ ; CHECK-NEXT: sll a4, a0, a6
286+ ; CHECK-NEXT: neg a7, a5
287+ ; CHECK-NEXT: srl a0, a0, a7
288+ ; CHECK-NEXT: sll a6, a1, a6
289+ ; CHECK-NEXT: or a0, a0, a6
290+ ; CHECK-NEXT: bnez a5, .LBB11_9
291+ ; CHECK-NEXT: j .LBB11_10
292+ ; CHECK-NEXT: .LBB11_7:
293+ ; CHECK-NEXT: srl a2, a1, a2
294+ ; CHECK-NEXT: andi a5, a6, 63
295+ ; CHECK-NEXT: bltu a5, a4, .LBB11_6
296+ ; CHECK-NEXT: .LBB11_8:
297+ ; CHECK-NEXT: li a4, 0
298+ ; CHECK-NEXT: sll a0, a0, a5
299+ ; CHECK-NEXT: beqz a5, .LBB11_10
300+ ; CHECK-NEXT: .LBB11_9:
301+ ; CHECK-NEXT: mv a1, a0
302+ ; CHECK-NEXT: .LBB11_10:
303+ ; CHECK-NEXT: or a0, a3, a4
304+ ; CHECK-NEXT: or a1, a2, a1
305+ ; CHECK-NEXT: ret
402306 %or = tail call i64 @llvm.fshr.i64 (i64 %a , i64 %a , i64 %b )
403307 ret i64 %or
404308}
0 commit comments