Skip to content

Commit c934058

Browse files
authored
riscv: implement ISLE lowering for uadd_overflow.i64 (#11540) (#11583)
* riscv: implement ISLE lowering for uadd_overflow.i64 (#11540) * fix to correct ISLE lowering for the 32-bit case * riscv: space fix for better formatting (#11540) * riscv: added riscv64 to the target list (#11540) * riscv: seperated narrow tests (#11540) * riscv: fix for i128 (#11540) * riscv: fix for i128 (#11540) * riscv: fix for i128 (#11540)
1 parent 3fe9c3c commit c934058

File tree

3 files changed

+64
-24
lines changed

3 files changed

+64
-24
lines changed

cranelift/codegen/src/isa/riscv64/lower.isle

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,37 @@
320320
(_ InstOutput (gen_trapif (IntCC.UnsignedLessThan) tmp x tc)))
321321
tmp))
322322

323+
;;;; Rules for uadd_overflow ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
324+
325+
;; For i64, we can use the fact that if a + b < a, then overflow occurred
326+
(rule 0 (lower (has_type $I64 (uadd_overflow x y)))
327+
(let ((sum XReg (rv_add x y))
328+
(overflow XReg (rv_sltu sum x)))
329+
(output_pair sum overflow)))
330+
331+
;; i32 case (on RV64 use addw to detect 32-bit overflow correctly)
332+
(rule 1 (lower (has_type $I32 (uadd_overflow x y)))
333+
(let ((x64 XReg (zext x))
334+
(sum XReg (rv_addw x y))
335+
(overflow XReg (rv_sltu sum x64)))
336+
(output_pair sum overflow)))
337+
338+
;; For i128, we need to handle the high and low parts separately
339+
(rule 2 (lower (has_type $I128 (uadd_overflow x y)))
340+
(let ((x_regs ValueRegs x)
341+
(y_regs ValueRegs y)
342+
(x_lo XReg (value_regs_get x_regs 0))
343+
(x_hi XReg (value_regs_get x_regs 1))
344+
(y_lo XReg (value_regs_get y_regs 0))
345+
(y_hi XReg (value_regs_get y_regs 1))
346+
(sum_lo XReg (rv_add x_lo y_lo))
347+
(carry XReg (rv_sltu sum_lo x_lo))
348+
(sum_hi XReg (rv_add x_hi y_hi))
349+
(sum_hi_with_carry XReg (rv_add sum_hi carry))
350+
(overflow XReg (rv_or (rv_sltu sum_hi_with_carry x_hi)
351+
(rv_and carry (rv_seqz (rv_xor sum_hi_with_carry x_hi))))))
352+
(output_pair (value_regs sum_lo sum_hi_with_carry) overflow)))
353+
323354
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
324355
;; Base case, simply subtracting things in registers.
325356

@@ -3099,4 +3130,4 @@
30993130
(y_pos VReg (rv_vmax_vx y (zero_reg) (unmasked) in_ty))
31003131
(x_clip VReg (rv_vnclipu_wi x_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))
31013132
(y_clip VReg (rv_vnclipu_wi y_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))))
3102-
(rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))
3133+
(rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))

cranelift/filetests/filetests/runtests/uadd_overflow.clif

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true
44
target aarch64
55
set enable_multi_ret_implicit_sret
66
target x86_64
7+
target riscv64
78

89
function %uaddof_i128(i128, i128) -> i128, i8 {
910
block0(v0: i128,v1: i128):
@@ -41,30 +42,7 @@ block0(v0: i64,v1: i64):
4142
; run: %uaddof_i64(0xA00A00A0_0A00A00A, 0x0BB0BB0B_B0BB0BB0) == [0xABBABBAB_BABBABBA, 0]
4243
; run: %uaddof_i64(0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0]
4344

44-
function %uaddof_i8(i8, i8) -> i8, i8 {
45-
block0(v0: i8, v1: i8):
46-
v2, v3 = uadd_overflow v0, v1
47-
return v2, v3
48-
}
49-
; run: %uaddof_i8(0, 1) == [1, 0]
50-
; run: %uaddof_i8(100, 27) == [127, 0]
51-
; run: %uaddof_i8(100, -20) == [80, 1]
52-
; run: %uaddof_i8(100, 28) == [-128, 0]
53-
; run: %uaddof_i8(-128, -128) == [0, 1]
54-
; run: %uaddof_i8(127, 1) == [0x80, 0]
5545

56-
function %uaddof_i16(i16, i16) -> i16, i8 {
57-
block0(v0: i16, v1: i16):
58-
v2, v3 = uadd_overflow v0, v1
59-
return v2, v3
60-
}
61-
; run: %uaddof_i16(0, 1) == [1, 0]
62-
; run: %uaddof_i16(100, 27) == [127, 0]
63-
; run: %uaddof_i16(100, 28) == [128, 0]
64-
; run: %uaddof_i16(32000, 767) == [32767, 0]
65-
; run: %uaddof_i16(32000, 768) == [-32768, 0]
66-
; run: %uaddof_i16(65000, 535) == [65535, 0]
67-
; run: %uaddof_i16(65000, 536) == [0, 1]
6846

6947
function %uaddof_i32(i32, i32) -> i32, i8 {
7048
block0(v0: i32, v1: i32):
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
test interpret
2+
test run
3+
set enable_llvm_abi_extensions=true
4+
target aarch64
5+
set enable_multi_ret_implicit_sret
6+
target x86_64
7+
8+
function %uaddof_i8(i8, i8) -> i8, i8 {
9+
block0(v0: i8, v1: i8):
10+
v2, v3 = uadd_overflow v0, v1
11+
return v2, v3
12+
}
13+
; run: %uaddof_i8(0, 1) == [1, 0]
14+
; run: %uaddof_i8(100, 27) == [127, 0]
15+
; run: %uaddof_i8(100, -20) == [80, 1]
16+
; run: %uaddof_i8(100, 28) == [-128, 0]
17+
; run: %uaddof_i8(-128, -128) == [0, 1]
18+
; run: %uaddof_i8(127, 1) == [0x80, 0]
19+
20+
function %uaddof_i16(i16, i16) -> i16, i8 {
21+
block0(v0: i16, v1: i16):
22+
v2, v3 = uadd_overflow v0, v1
23+
return v2, v3
24+
}
25+
; run: %uaddof_i16(0, 1) == [1, 0]
26+
; run: %uaddof_i16(100, 27) == [127, 0]
27+
; run: %uaddof_i16(100, 28) == [128, 0]
28+
; run: %uaddof_i16(32000, 767) == [32767, 0]
29+
; run: %uaddof_i16(32000, 768) == [-32768, 0]
30+
; run: %uaddof_i16(65000, 535) == [65535, 0]
31+
; run: %uaddof_i16(65000, 536) == [0, 1]

0 commit comments

Comments
 (0)