riscv: implement ISLE lowering for uadd_overflow.i64 (#11540) (#11583)

thatrajeevkr · web-flow · commit c9340587eca9 · 2025-09-03T16:53:41.000Z
* riscv: implement ISLE lowering for uadd_overflow.i64 (#11540) * fix to correct ISLE lowering for the 32-bit case * riscv: space fix for better formatting (#11540) * riscv: added riscv64 to the target list (#11540) * riscv: seperated narrow tests (#11540) * riscv: fix for i128 (#11540) * riscv: fix for i128 (#11540) * riscv: fix for i128 (#11540)
diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -320,6 +320,37 @@
         (_ InstOutput (gen_trapif (IntCC.UnsignedLessThan) tmp x tc)))
     tmp))
 
+;;;; Rules for uadd_overflow ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; For i64, we can use the fact that if a + b < a, then overflow occurred
+(rule 0 (lower (has_type $I64 (uadd_overflow x y)))
+  (let ((sum XReg (rv_add x y))
+        (overflow XReg (rv_sltu sum x)))
+    (output_pair sum overflow)))
+
+;; i32 case (on RV64 use addw to detect 32-bit overflow correctly)
+(rule 1 (lower (has_type $I32 (uadd_overflow x y)))
+  (let ((x64 XReg (zext x))                
+        (sum XReg (rv_addw x y))           
+        (overflow XReg (rv_sltu sum x64))) 
+    (output_pair sum overflow)))
+
+;; For i128, we need to handle the high and low parts separately
+(rule 2 (lower (has_type $I128 (uadd_overflow x y)))
+  (let ((x_regs ValueRegs x)
+        (y_regs ValueRegs y)
+        (x_lo XReg (value_regs_get x_regs 0))
+        (x_hi XReg (value_regs_get x_regs 1))
+        (y_lo XReg (value_regs_get y_regs 0))
+        (y_hi XReg (value_regs_get y_regs 1))
+        (sum_lo XReg (rv_add x_lo y_lo))
+        (carry XReg (rv_sltu sum_lo x_lo))
+        (sum_hi XReg (rv_add x_hi y_hi))
+        (sum_hi_with_carry XReg (rv_add sum_hi carry))
+        (overflow XReg (rv_or (rv_sltu sum_hi_with_carry x_hi) 
+                              (rv_and carry (rv_seqz (rv_xor sum_hi_with_carry x_hi))))))
+    (output_pair (value_regs sum_lo sum_hi_with_carry) overflow)))
+
 ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Base case, simply subtracting things in registers.
 
@@ -3099,4 +3130,4 @@
         (y_pos VReg (rv_vmax_vx y (zero_reg) (unmasked) in_ty))
         (x_clip VReg (rv_vnclipu_wi x_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))
         (y_clip VReg (rv_vnclipu_wi y_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))))
-    (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))
+    (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))
diff --git a/cranelift/filetests/filetests/runtests/uadd_overflow.clif b/cranelift/filetests/filetests/runtests/uadd_overflow.clif
@@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true
 target aarch64
 set enable_multi_ret_implicit_sret
 target x86_64
+target riscv64
 
 function %uaddof_i128(i128, i128) -> i128, i8 {
 block0(v0: i128,v1: i128):
@@ -41,30 +42,7 @@ block0(v0: i64,v1: i64):
 ; run: %uaddof_i64(0xA00A00A0_0A00A00A, 0x0BB0BB0B_B0BB0BB0) == [0xABBABBAB_BABBABBA, 0]
 ; run: %uaddof_i64(0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0]
 
-function %uaddof_i8(i8, i8) -> i8, i8 {
-block0(v0: i8, v1: i8):
-    v2, v3 = uadd_overflow v0, v1
-    return v2, v3
-}
-; run: %uaddof_i8(0, 1) == [1, 0]
-; run: %uaddof_i8(100, 27) == [127, 0]
-; run: %uaddof_i8(100, -20) == [80, 1]
-; run: %uaddof_i8(100, 28) == [-128, 0]
-; run: %uaddof_i8(-128, -128) == [0, 1]
-; run: %uaddof_i8(127, 1) == [0x80, 0]
 
-function %uaddof_i16(i16, i16) -> i16, i8 {
-block0(v0: i16, v1: i16):
-    v2, v3 = uadd_overflow v0, v1
-    return v2, v3
-}
-; run: %uaddof_i16(0, 1) == [1, 0]
-; run: %uaddof_i16(100, 27) == [127, 0]
-; run: %uaddof_i16(100, 28) == [128, 0]
-; run: %uaddof_i16(32000, 767) == [32767, 0]
-; run: %uaddof_i16(32000, 768) == [-32768, 0]
-; run: %uaddof_i16(65000, 535) == [65535, 0]
-; run: %uaddof_i16(65000, 536) == [0, 1]
 
 function %uaddof_i32(i32, i32) -> i32, i8 {
 block0(v0: i32, v1: i32):
diff --git a/cranelift/filetests/filetests/runtests/uadd_overflow_narrow.clif b/cranelift/filetests/filetests/runtests/uadd_overflow_narrow.clif
@@ -0,0 +1,31 @@
+test interpret
+test run
+set enable_llvm_abi_extensions=true
+target aarch64
+set enable_multi_ret_implicit_sret
+target x86_64
+
+function %uaddof_i8(i8, i8) -> i8, i8 {
+block0(v0: i8, v1: i8):
+    v2, v3 = uadd_overflow v0, v1
+    return v2, v3
+}
+; run: %uaddof_i8(0, 1) == [1, 0]
+; run: %uaddof_i8(100, 27) == [127, 0]
+; run: %uaddof_i8(100, -20) == [80, 1]
+; run: %uaddof_i8(100, 28) == [-128, 0]
+; run: %uaddof_i8(-128, -128) == [0, 1]
+; run: %uaddof_i8(127, 1) == [0x80, 0]
+
+function %uaddof_i16(i16, i16) -> i16, i8 {
+block0(v0: i16, v1: i16):
+    v2, v3 = uadd_overflow v0, v1
+    return v2, v3
+}
+; run: %uaddof_i16(0, 1) == [1, 0]
+; run: %uaddof_i16(100, 27) == [127, 0]
+; run: %uaddof_i16(100, 28) == [128, 0]
+; run: %uaddof_i16(32000, 767) == [32767, 0]
+; run: %uaddof_i16(32000, 768) == [-32768, 0]
+; run: %uaddof_i16(65000, 535) == [65535, 0]
+; run: %uaddof_i16(65000, 536) == [0, 1]