From 07bb9ce9997e81c23fcfe321b24e969c209539b1 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 27 Sep 2025 11:07:06 +0100 Subject: [PATCH 01/11] Add optimizations for leading / trailing zeros --- cranelift/codegen/src/isle_prelude.rs | 10 +++++++ cranelift/codegen/src/opts/cprop.isle | 11 ++++++++ cranelift/codegen/src/prelude.isle | 4 +++ .../filetests/filetests/egraph/cprop.clif | 27 +++++++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 0b04f75a5b23..7dc08eeb943a 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -27,6 +27,16 @@ macro_rules! isle_common_prelude_methods { self.checked_add_with_type(ty, a, b).is_none() } + #[inline] + fn imm64_clz(&mut self, ty: Type, a: Imm64) -> Imm64 { + let bits = ty.bits(); + assert!(bits <= 64); + let clz_offset = 64 - bits; + let a_v: u64 = a.bits().cast_unsigned(); + let lz = a_v.leading_zeros() - clz_offset; + Imm64::new(lz as i64) + } + #[inline] fn imm64_sdiv(&mut self, ty: Type, x: Imm64, y: Imm64) -> Option { // Sign extend `x` and `y`. diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index 2e9ea307ec0f..e5bbc4c001b1 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -1,5 +1,16 @@ ;; Constant propagation. +(rule (simplify + (clz (fits_in_64 ty) + (iconst ty kx))) + (subsume (iconst ty (imm64_clz ty kx)))) + + +(rule (simplify + (ctz (fits_in_64 ty) + (iconst ty (u64_from_imm64 kx)))) + (subsume (iconst ty (imm64_masked ty (u64_trailing_zeros kx))))) + (rule (simplify (iadd (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 20a61cde2e67..2b79bde864d5 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -96,6 +96,10 @@ (decl pure imm64_icmp (Type IntCC Imm64 Imm64) Imm64) (extern constructor imm64_icmp imm64_icmp) +(decl pure imm64_clz (Type Imm64) Imm64) +(extern constructor imm64_clz imm64_clz) + + ;; Each of these extractors tests whether the upper half of the input equals the ;; lower half of the input (decl u128_replicated_u64 (u64) u128) diff --git a/cranelift/filetests/filetests/egraph/cprop.clif b/cranelift/filetests/filetests/egraph/cprop.clif index 87559027a07c..0921a0cc8193 100644 --- a/cranelift/filetests/filetests/egraph/cprop.clif +++ b/cranelift/filetests/filetests/egraph/cprop.clif @@ -22,6 +22,33 @@ block0: ; check: v3 = iconst.i16 -2 ; nextln: return v3 +function %f0() -> i8 { +block0: + v1 = iconst.i8 51 + v2 = clz.i8 v1 + return v2 +} + +function %f0() -> i16 { +block0: + v1 = iconst.i16 51 + v2 = clz.i16 v1 + return v2 +} + +; check: v3 = iconst.i16 10 +; nextln: return v3 + +function %f0() -> i16 { +block0: + v1 = iconst.i16 48 + v2 = ctz.i16 v1 + return v2 +} + +; check: v3 = iconst.i16 4 +; nextln: return v3 + function %ishl() -> i8 { block0: v0 = iconst.i8 1 From 0d79d4a87a3419860385388cdf285a3a34b49767 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 27 Sep 2025 13:57:39 +0100 Subject: [PATCH 02/11] Add optimizations for srem --- cranelift/codegen/src/isle_prelude.rs | 22 +++++++++++++++++++ cranelift/codegen/src/opts/cprop.isle | 6 +++++ cranelift/codegen/src/prelude.isle | 3 +++ .../filetests/filetests/egraph/skeleton.clif | 16 +++++++++++++- 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 7dc08eeb943a..b8a768dcfe36 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -59,6 +59,28 @@ macro_rules! isle_common_prelude_methods { Some(Imm64::new(result)) } + #[inline] + fn imm64_srem(&mut self, ty: Type, x: Imm64, y: Imm64) -> Option { + // Sign extend `x` and `y`. + let shift = u32::checked_sub(64, ty.bits()).unwrap_or(0); + let x = (x.bits() << shift) >> shift; + let y = (y.bits() << shift) >> shift; + + // NB: We can't rely on `checked_rem` to detect `ty::MIN / -1` + // (which overflows and should trap) because we are working with + // `i64` values here, and `i32::MIN != i64::MIN`, for + // example. Therefore, we have to explicitly check for this case + // ourselves. + let min = ((self.ty_smin(ty) as i64) << shift) >> shift; + if x == min && y == -1 { + return None; + } + + let ty_mask = self.ty_mask(ty) as i64; + let result = x.checked_rem(y)? & ty_mask; + Some(Imm64::new(result)) + } + #[inline] fn imm64_shl(&mut self, ty: Type, x: Imm64, y: Imm64) -> Imm64 { // Mask off any excess shift bits. diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index e5bbc4c001b1..98a59f680b51 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -35,6 +35,12 @@ (if-let d (imm64_sdiv ty k1 k2)) (iconst ty d)) +(rule (simplify_skeleton + (srem (iconst ty k1) + (iconst _ k2))) + (if-let d (imm64_srem ty k1 k2)) + (iconst ty d)) + (rule (simplify_skeleton (udiv (iconst_u ty k1) (iconst_u ty k2))) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 2b79bde864d5..46984aae88fc 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -73,6 +73,9 @@ (decl pure partial imm64_sdiv (Type Imm64 Imm64) Imm64) (extern constructor imm64_sdiv imm64_sdiv) +(decl pure partial imm64_srem (Type Imm64 Imm64) Imm64) +(extern constructor imm64_srem imm64_srem) + (decl pure imm64_shl (Type Imm64 Imm64) Imm64) (extern constructor imm64_shl imm64_shl) diff --git a/cranelift/filetests/filetests/egraph/skeleton.clif b/cranelift/filetests/filetests/egraph/skeleton.clif index 81289c74e886..b798da15f10e 100644 --- a/cranelift/filetests/filetests/egraph/skeleton.clif +++ b/cranelift/filetests/filetests/egraph/skeleton.clif @@ -110,6 +110,21 @@ block0: ; return v11 ; v11 = -1 ; } +function %cprop_srem() -> i32 { +block0: + v0 = iconst.i32 -17 + v1 = iconst.i32 7 + v2 = srem v0, v1 + return v2 +} + +; function %cprop_srem() -> i32 fast { +; block0: +; v28 = iconst.i32 -3 +; v2 -> v28 +; return v28 ; v28 = -3 +; } + function %udiv_by_one(i32) -> i32 { block0(v0: i32): v1 = iconst.i32 1 @@ -228,4 +243,3 @@ block0: ; v2 = uadd_overflow_trap v0, v1, user42 ; v0 = -1, v1 = 1 ; return v2 ; } - From ddf8b3a616933ba4c2d841ed64601abf11ed6e99 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 27 Sep 2025 13:59:55 +0100 Subject: [PATCH 03/11] Add optimizations for urem --- cranelift/codegen/src/opts/cprop.isle | 6 ++++++ .../filetests/filetests/egraph/skeleton.clif | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index 98a59f680b51..f2bcc6b2900e 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -47,6 +47,12 @@ (if-let d (u64_checked_div k1 k2)) (iconst ty (imm64_masked ty d))) +(rule (simplify_skeleton + (urem (iconst_u ty k1) + (iconst_u ty k2))) + (if-let d (u64_checked_rem k1 k2)) + (iconst ty (imm64_masked ty d))) + (rule (simplify (bor (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) diff --git a/cranelift/filetests/filetests/egraph/skeleton.clif b/cranelift/filetests/filetests/egraph/skeleton.clif index b798da15f10e..7f3c6cec8319 100644 --- a/cranelift/filetests/filetests/egraph/skeleton.clif +++ b/cranelift/filetests/filetests/egraph/skeleton.clif @@ -95,6 +95,21 @@ block0: ; return v18 ; v18 = 1 ; } +function %cprop_urem() -> i32 { +block0: + v0 = iconst.i32 13 + v1 = iconst.i32 7 + v2 = urem v0, v1 + return v2 +} + +; function %cprop_urem() -> i32 fast { +; block0: +; v37 = iconst.i32 6 +; v2 -> v37 +; return v37 ; v37 = 6 +; } + function %cprop_sdiv() -> i32 { block0: v0 = iconst.i32 -7 From ceaf8625d177469343da698771bb16d28437197e Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Mon, 29 Sep 2025 20:20:19 +0100 Subject: [PATCH 04/11] clean up --- cranelift/codegen/src/isle_prelude.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index b8a768dcfe36..0aabd60784a9 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -34,13 +34,14 @@ macro_rules! isle_common_prelude_methods { let clz_offset = 64 - bits; let a_v: u64 = a.bits().cast_unsigned(); let lz = a_v.leading_zeros() - clz_offset; - Imm64::new(lz as i64) + Imm64::new(i64::from(lz)) } #[inline] fn imm64_sdiv(&mut self, ty: Type, x: Imm64, y: Imm64) -> Option { // Sign extend `x` and `y`. - let shift = u32::checked_sub(64, ty.bits()).unwrap_or(0); + assert!(ty.bits() <= 64); + let shift = 64 - ty.bits(); let x = (x.bits() << shift) >> shift; let y = (y.bits() << shift) >> shift; @@ -62,7 +63,8 @@ macro_rules! isle_common_prelude_methods { #[inline] fn imm64_srem(&mut self, ty: Type, x: Imm64, y: Imm64) -> Option { // Sign extend `x` and `y`. - let shift = u32::checked_sub(64, ty.bits()).unwrap_or(0); + assert!(ty.bits() <= 64); + let shift = 64 - ty.bits(); let x = (x.bits() << shift) >> shift; let y = (y.bits() << shift) >> shift; From a36672d7bf1aad53b92bbe288c8bb59d71af1f42 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Mon, 29 Sep 2025 21:30:23 +0100 Subject: [PATCH 05/11] fixes --- cranelift/codegen/src/isle_prelude.rs | 30 ++++++------ cranelift/codegen/src/opts/arithmetic.isle | 3 -- cranelift/codegen/src/opts/cprop.isle | 4 +- .../filetests/filetests/egraph/skeleton.clif | 49 +++++++++++++++++++ 4 files changed, 66 insertions(+), 20 deletions(-) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 0aabd60784a9..292b60fde28a 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -40,10 +40,11 @@ macro_rules! isle_common_prelude_methods { #[inline] fn imm64_sdiv(&mut self, ty: Type, x: Imm64, y: Imm64) -> Option { // Sign extend `x` and `y`. - assert!(ty.bits() <= 64); - let shift = 64 - ty.bits(); - let x = (x.bits() << shift) >> shift; - let y = (y.bits() << shift) >> shift; + let type_width = ty.bits(); + assert!(type_width <= 64); + let x = x.sign_extend_from_width(type_width).bits(); + let y = y.sign_extend_from_width(type_width).bits(); + let shift = 64 - type_width; // NB: We can't rely on `checked_div` to detect `ty::MIN / -1` // (which overflows and should trap) because we are working with @@ -55,20 +56,20 @@ macro_rules! isle_common_prelude_methods { return None; } - let ty_mask = self.ty_mask(ty) as i64; - let result = x.checked_div(y)? & ty_mask; - Some(Imm64::new(result)) + let result = x.checked_div(y)?; + Some(Imm64::new(result).mask_to_width(type_width)) } #[inline] fn imm64_srem(&mut self, ty: Type, x: Imm64, y: Imm64) -> Option { // Sign extend `x` and `y`. - assert!(ty.bits() <= 64); - let shift = 64 - ty.bits(); - let x = (x.bits() << shift) >> shift; - let y = (y.bits() << shift) >> shift; + let type_width = ty.bits(); + assert!(type_width <= 64); + let x = x.sign_extend_from_width(type_width).bits(); + let y = y.sign_extend_from_width(type_width).bits(); + let shift = 64 - type_width; - // NB: We can't rely on `checked_rem` to detect `ty::MIN / -1` + // NB: We can't rely on `checked_div` to detect `ty::MIN / -1` // (which overflows and should trap) because we are working with // `i64` values here, and `i32::MIN != i64::MIN`, for // example. Therefore, we have to explicitly check for this case @@ -78,9 +79,8 @@ macro_rules! isle_common_prelude_methods { return None; } - let ty_mask = self.ty_mask(ty) as i64; - let result = x.checked_rem(y)? & ty_mask; - Some(Imm64::new(result)) + let result = x.checked_rem(y)?; + Some(Imm64::new(result).mask_to_width(type_width)) } #[inline] diff --git a/cranelift/codegen/src/opts/arithmetic.isle b/cranelift/codegen/src/opts/arithmetic.isle index 48578730bc07..7327d0d079ec 100644 --- a/cranelift/codegen/src/opts/arithmetic.isle +++ b/cranelift/codegen/src/opts/arithmetic.isle @@ -125,7 +125,6 @@ ;; x % 1 == 0 (rule (simplify_skeleton (urem x (iconst_u ty 1))) (iconst_u ty 0)) (rule (simplify_skeleton (srem x (iconst_u ty 1))) (iconst_u ty 0)) -(rule (simplify_skeleton (srem x (iconst_s ty -1))) (iconst_u ty 0)) ;; Unsigned `x % d == x & ((1 << ilog2(d)) - 1)` when `d` is a power of two. (rule (simplify_skeleton (urem x (iconst_u ty (u64_extract_power_of_two d)))) @@ -339,5 +338,3 @@ ;; (x + y) - y --> x (rule (simplify (isub ty (iadd ty x y) x)) y) (rule (simplify (isub ty (iadd ty x y) y)) x) - - diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index f2bcc6b2900e..e19dc865d2d1 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -31,13 +31,13 @@ (rule (simplify_skeleton (sdiv (iconst ty k1) - (iconst _ k2))) + (iconst ty k2))) (if-let d (imm64_sdiv ty k1 k2)) (iconst ty d)) (rule (simplify_skeleton (srem (iconst ty k1) - (iconst _ k2))) + (iconst ty k2))) (if-let d (imm64_srem ty k1 k2)) (iconst ty d)) diff --git a/cranelift/filetests/filetests/egraph/skeleton.clif b/cranelift/filetests/filetests/egraph/skeleton.clif index 7f3c6cec8319..625d8e74639c 100644 --- a/cranelift/filetests/filetests/egraph/skeleton.clif +++ b/cranelift/filetests/filetests/egraph/skeleton.clif @@ -125,6 +125,54 @@ block0: ; return v11 ; v11 = -1 ; } +function %cprop_sdiv_i8_min() -> i8 { +block0: + v0 = iconst.i8 -128 + v1 = iconst.i8 -1 + v2 = sdiv v0, v1 + return v2 +} + +;function %cprop_sdiv_i8_min() -> i8 fast { +;block0: +; v0 = iconst.i8 -128 +; v1 = iconst.i8 -1 +; v2 = sdiv v0, v1 ; v0 = -128, v1 = -1 +; return v2 +;} + +function %cprop_srem_i8_min() -> i8 { +block0: + v0 = iconst.i8 -128 + v1 = iconst.i8 -1 + v2 = srem v0, v1 + return v2 +} + +;function %cprop_srem_i8_min() -> i8 fast { +;block0: +; v0 = iconst.i8 -128 +; v1 = iconst.i8 -1 +; v2 = srem v0, v1 ; v0 = -128, v1 = -1 +; return v2 +;} + +function %cprop_srem_i64_min() -> i64 { +block0: + v0 = iconst.i64 -9223372036854775808 + v1 = iconst.i64 -1 + v2 = srem v0, v1 + return v2 +} + +;function %cprop_srem_i64_min() -> i64 fast { +;block0: +; v0 = iconst.i64 -9223372036854775808 +; v1 = iconst.i64 -1 +; v2 = srem v0, v1 ; v0 = -9223372036854775808, v1 = -1 +; return v2 +;} + function %cprop_srem() -> i32 { block0: v0 = iconst.i32 -17 @@ -140,6 +188,7 @@ block0: ; return v28 ; v28 = -3 ; } + function %udiv_by_one(i32) -> i32 { block0(v0: i32): v1 = iconst.i32 1 From 7aadd9c17b720855325302158aa672c78b14cbb7 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 4 Oct 2025 10:52:56 +0100 Subject: [PATCH 06/11] fixes --- cranelift/codegen/src/isle_prelude.rs | 13 + cranelift/codegen/src/opts/cprop.isle | 8 +- cranelift/codegen/src/prelude.isle | 2 + .../filetests/filetests/egraph/cprop.clif | 10 + .../filetests/runtests/srem_opts.clif | 405 ++++++++++++++++++ 5 files changed, 434 insertions(+), 4 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/srem_opts.clif diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 292b60fde28a..525e2913b5b8 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -37,6 +37,19 @@ macro_rules! isle_common_prelude_methods { Imm64::new(i64::from(lz)) } + #[inline] + fn imm64_ctz(&mut self, ty: Type, a: Imm64) -> Imm64 { + let bits = ty.bits(); + assert!(bits <= 64); + let a_v: u64 = a.bits().cast_unsigned(); + if a_v == 0 { + // ctz(0) is defined to be the number of bits in the type. + return Imm64::new(i64::from(bits)); + } + let lz = a_v.trailing_zeros(); + Imm64::new(i64::from(lz)) + } + #[inline] fn imm64_sdiv(&mut self, ty: Type, x: Imm64, y: Imm64) -> Option { // Sign extend `x` and `y`. diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index e19dc865d2d1..e6ec5c84db40 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -1,15 +1,15 @@ ;; Constant propagation. (rule (simplify - (clz (fits_in_64 ty) - (iconst ty kx))) + (clz (fits_in_64 ty) + (iconst ty kx))) (subsume (iconst ty (imm64_clz ty kx)))) (rule (simplify (ctz (fits_in_64 ty) - (iconst ty (u64_from_imm64 kx)))) - (subsume (iconst ty (imm64_masked ty (u64_trailing_zeros kx))))) + (iconst ty kx))) + (subsume (iconst ty (imm64_ctz ty kx)))) (rule (simplify (iadd (fits_in_64 ty) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 46984aae88fc..f6493c7987f7 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -102,6 +102,8 @@ (decl pure imm64_clz (Type Imm64) Imm64) (extern constructor imm64_clz imm64_clz) +(decl pure imm64_ctz (Type Imm64) Imm64) +(extern constructor imm64_ctz imm64_ctz) ;; Each of these extractors tests whether the upper half of the input equals the ;; lower half of the input diff --git a/cranelift/filetests/filetests/egraph/cprop.clif b/cranelift/filetests/filetests/egraph/cprop.clif index 0921a0cc8193..fd3d88cb2202 100644 --- a/cranelift/filetests/filetests/egraph/cprop.clif +++ b/cranelift/filetests/filetests/egraph/cprop.clif @@ -49,6 +49,16 @@ block0: ; check: v3 = iconst.i16 4 ; nextln: return v3 +function %f0() -> i16 { +block0: + v1 = iconst.i16 0 + v2 = ctz.i16 v1 + return v2 +} + +; check: v3 = iconst.i16 16 +; nextln: return v3 + function %ishl() -> i8 { block0: v0 = iconst.i8 1 diff --git a/cranelift/filetests/filetests/runtests/srem_opts.clif b/cranelift/filetests/filetests/runtests/srem_opts.clif new file mode 100644 index 000000000000..8ba1aa13a929 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/srem_opts.clif @@ -0,0 +1,405 @@ +test interpret +test run +set opt_level=none +target aarch64 +target x86_64 +target x86_64 has_avx +target s390x +target riscv64 +target riscv64 has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be + +set opt_level=speed +target aarch64 +target x86_64 +target x86_64 has_avx +target s390x +target riscv64 +target riscv64 has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be + + +function %srem_i64(i64, i64) -> i64 { +block0(v0: i64,v1: i64): + v2 = srem v0, v1 + return v2 +} +; run: %srem_i64(0, 1) == 0 +; run: %srem_i64(2, 2) == 0 +; run: %srem_i64(1, -1) == 0 +; run: %srem_i64(3, 2) == 1 +; run: %srem_i64(19, 7) == 5 +; run: %srem_i64(3, -2) == 1 +; run: %srem_i64(-19, 7) == -5 +; run: %srem_i64(-57, -5) == -2 +; run: %srem_i64(0, 104857600000) == 0 +; run: %srem_i64(104857600000, 511) == 398 +; run: %srem_i64(0xC0FFEEEE_DECAFFFF, 8) == -1 +; run: %srem_i64(0xC0FFEEEE_DECAFFFF, -8) == -1 +; run: %srem_i64(0x80000000_00000000, -2) == 0 + +function %srem_i32(i32, i32) -> i32 { +block0(v0: i32,v1: i32): + v2 = srem v0, v1 + return v2 +} +; run: %srem_i32(0, 1) == 0 +; run: %srem_i32(2, 2) == 0 +; run: %srem_i32(1, -1) == 0 +; run: %srem_i32(3, 2) == 1 +; run: %srem_i32(19, 7) == 5 +; run: %srem_i32(3, -2) == 1 +; run: %srem_i32(-19, 7) == -5 +; run: %srem_i32(0, 13) == 0 +; run: %srem_i32(1048576, 8192) == 0 +; run: %srem_i32(-1024, 255) == -4 +; run: %srem_i32(0xC0FFEEEE, 8) == -2 +; run: %srem_i32(0xC0FFEEEE, -8) == -2 +; run: %srem_i32(0x80000000, -2) == 0 + +function %srem_i16(i16, i16) -> i16 { +block0(v0: i16,v1: i16): + v2 = srem v0, v1 + return v2 +} +; run: %srem_i16(0, 1) == 0 +; run: %srem_i16(2, 2) == 0 +; run: %srem_i16(1, -1) == 0 +; run: %srem_i16(3, 2) == 1 +; run: %srem_i16(19, 7) == 5 +; run: %srem_i16(3, -2) == 1 +; run: %srem_i16(13, 5) == 3 +; run: %srem_i16(0, 42) == 0 +; run: %srem_i16(4, -2) == 0 +; run: %srem_i16(-19, 7) == -5 +; run: %srem_i16(0xC0FF, 8) == -1 +; run: %srem_i16(0xC0FF, -8) == -1 +; run: %srem_i16(0x8000, -2) == 0 + +function %srem_i8(i8, i8) -> i8 { +block0(v0: i8,v1: i8): + v2 = srem v0, v1 + return v2 +} +; run: %srem_i8(0, 1) == 0 +; run: %srem_i8(2, 2) == 0 +; run: %srem_i8(1, -1) == 0 +; run: %srem_i8(2, 7) == 2 +; run: %srem_i8(3, 2) == 1 +; run: %srem_i8(19, 7) == 5 +; run: %srem_i8(3, -2) == 1 +; run: %srem_i8(-19, 7) == -5 +; run: %srem_i8(0xC0, 8) == 0 +; run: %srem_i8(0xC0, -8) == 0 +; run: %srem_i8(0x80, -2) == 0 + + +function %srem_imm_i64(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 3 + return v1 +} +; run: %srem_imm_i64(0) == 0 +; run: %srem_imm_i64(1) == 1 +; run: %srem_imm_i64(2) == 2 +; run: %srem_imm_i64(3) == 0 +; run: %srem_imm_i64(19) == 1 +; run: %srem_imm_i64(-19) == -1 +; run: %srem_imm_i64(-57) == 0 +; run: %srem_imm_i64(104857600000) == 1 +; run: %srem_imm_i64(0xC0FFEEEE_DECAFFFF) == -1 +; run: %srem_imm_i64(0x80000000_00000000) == -2 + +function %srem_imm_i32(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 3 + return v1 +} +; run: %srem_imm_i32(0) == 0 +; run: %srem_imm_i32(1) == 1 +; run: %srem_imm_i32(2) == 2 +; run: %srem_imm_i32(3) == 0 +; run: %srem_imm_i32(4) == 1 +; run: %srem_imm_i32(19) == 1 +; run: %srem_imm_i32(-19) == -1 +; run: %srem_imm_i32(-42) == 0 +; run: %srem_imm_i32(1057) == 1 +; run: %srem_imm_i32(0xC0FFEEEE) == -2 + +function %srem_imm_i16(i16) -> i16 { +block0(v0: i16): + v1 = srem_imm v0, 3 + return v1 +} +; run: %srem_imm_i16(0) == 0 +; run: %srem_imm_i16(1) == 1 +; run: %srem_imm_i16(2) == 2 +; run: %srem_imm_i16(3) == 0 +; run: %srem_imm_i16(4) == 1 +; run: %srem_imm_i16(19) == 1 +; run: %srem_imm_i16(-19) == -1 +; run: %srem_imm_i16(0xC0FF) == -1 +; run: %srem_imm_i16(0x8000) == -2 + +function %srem_imm_i8(i8) -> i8 { +block0(v0: i8): + v1 = srem_imm v0, 3 + return v1 +} +; run: %srem_imm_i8(0) == 0 +; run: %srem_imm_i8(1) == 1 +; run: %srem_imm_i8(2) == 2 +; run: %srem_imm_i8(3) == 0 +; run: %srem_imm_i8(19) == 1 +; run: %srem_imm_i8(-19) == -1 +; run: %srem_imm_i8(0xC0) == -1 +; run: %srem_imm_i8(0x80) == -2 + +function %srem_with_bmask(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = bmask.i8 v0 + v3 = srem v2, v1 + return v3 +} +; run: %srem_with_bmask(4352, -1) == 0 +; run: %srem_with_bmask(4352, 1) == 0 + +; === Constant Propagation Tests (Guaranteed Optimization) === + +; Basic constant folding tests - these should be optimized to constants +function %const_srem_basic_i64() -> i64 { +block0: + v0 = iconst.i64 17 + v1 = iconst.i64 5 + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_basic_i64() == 2 + +function %const_srem_negative_dividend_i64() -> i64 { +block0: + v0 = iconst.i64 -17 + v1 = iconst.i64 5 + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_negative_dividend_i64() == -2 + +function %const_srem_negative_divisor_i64() -> i64 { +block0: + v0 = iconst.i64 17 + v1 = iconst.i64 -5 + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_negative_divisor_i64() == 2 + +function %const_srem_both_negative_i64() -> i64 { +block0: + v0 = iconst.i64 -17 + v1 = iconst.i64 -5 + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_both_negative_i64() == -2 + +; Power of 2 divisors - these should be optimized to bit operations +function %const_srem_pow2_8_i64() -> i64 { +block0: + v0 = iconst.i64 100 + v1 = iconst.i64 8 + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_pow2_8_i64() == 4 + +function %const_srem_pow2_16_i64() -> i64 { +block0: + v0 = iconst.i64 100 + v1 = iconst.i64 16 + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_pow2_16_i64() == 4 + +function %const_srem_pow2_negative_i64() -> i64 { +block0: + v0 = iconst.i64 -100 + v1 = iconst.i64 8 + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_pow2_negative_i64() == -4 + +; Large constants that test overflow boundaries +function %const_srem_large_i64() -> i64 { +block0: + v0 = iconst.i64 0x7FFFFFFFFFFFFFFE ; i64::MAX - 1 + v1 = iconst.i64 0x7FFFFFFFFFFFFFFF ; i64::MAX + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_large_i64() == 0x7FFFFFFFFFFFFFFE + +; Test with 32-bit constants +function %const_srem_basic_i32() -> i32 { +block0: + v0 = iconst.i32 2147483647 ; i32::MAX + v1 = iconst.i32 1000000 + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_basic_i32() == 483647 + +function %const_srem_pow2_i32() -> i32 { +block0: + v0 = iconst.i32 1000 + v1 = iconst.i32 64 + v2 = srem v0, v1 + return v2 +} +; run: %const_srem_pow2_i32() == 40 + +; Test immediate forms with guaranteed constant optimization +function %const_srem_imm_pow2_i64() -> i64 { +block0: + v0 = iconst.i64 1000 + v1 = srem_imm v0, 16 + return v1 +} +; run: %const_srem_imm_pow2_i64() == 8 + +function %const_srem_imm_non_pow2_i64() -> i64 { +block0: + v0 = iconst.i64 1000 + v1 = srem_imm v0, 7 + return v1 +} +; run: %const_srem_imm_non_pow2_i64() == 6 + +function %const_srem_imm_negative_i64() -> i64 { +block0: + v0 = iconst.i64 -1000 + v1 = srem_imm v0, 7 + return v1 +} +; run: %const_srem_imm_negative_i64() == -6 + +; === Additional Edge Cases === + +; Test zero dividend +function %srem_zero_dividend_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 0 + v2 = srem v1, v0 + return v2 +} +; run: %srem_zero_dividend_i64(5) == 0 +; run: %srem_zero_dividend_i64(-5) == 0 +; run: %srem_zero_dividend_i64(1) == 0 +; run: %srem_zero_dividend_i64(-1) == 0 + +; Test remainder that equals dividend (divisor > dividend) +function %srem_larger_divisor_i64() -> i64 { +block0: + v0 = iconst.i64 5 + v1 = iconst.i64 10 + v2 = srem v0, v1 + return v2 +} +; run: %srem_larger_divisor_i64() == 5 + +function %srem_larger_divisor_negative_i64() -> i64 { +block0: + v0 = iconst.i64 -5 + v1 = iconst.i64 10 + v2 = srem v0, v1 + return v2 +} +; run: %srem_larger_divisor_negative_i64() == -5 + +; Test powers of 2 with various patterns +function %srem_pow2_pattern_test_i64() -> i64 { +block0: + v0 = iconst.i64 -72340172838076674 ; + v1 = iconst.i64 256 + v2 = srem v0, v1 + return v2 +} +; run: %srem_pow2_pattern_test_i64() == -2 + +function %srem_pow2_pattern_test2_i64() -> i64 { +block0: + v0 = iconst.i64 -72340172838076673 ; + v1 = iconst.i64 256 + v2 = srem v0, v1 + return v2 +} +; run: %srem_pow2_pattern_test2_i64() == -1 + +; Test mixed sign edge cases +function %srem_mixed_signs_edge1_i64() -> i64 { +block0: + v0 = iconst.i64 -1 + v1 = iconst.i64 0x7FFFFFFFFFFFFFFF + v2 = srem v0, v1 + return v2 +} +; run: %srem_mixed_signs_edge1_i64() == -1 + +function %srem_mixed_signs_edge2_i64() -> i64 { +block0: + v0 = iconst.i64 0x7FFFFFFFFFFFFFFF + v1 = iconst.i64 -2 + v2 = srem v0, v1 + return v2 +} +; run: %srem_mixed_signs_edge2_i64() == 1 + + +function %srem_imm_pow2_more_i64(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 32 + return v1 +} +; run: %srem_imm_pow2_more_i64(100) == 4 +; run: %srem_imm_pow2_more_i64(-100) == -4 +; run: %srem_imm_pow2_more_i64(31) == 31 +; run: %srem_imm_pow2_more_i64(-31) == -31 + +function %srem_imm_pow2_more_i32(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 64 + return v1 +} +; run: %srem_imm_pow2_more_i32(100) == 36 +; run: %srem_imm_pow2_more_i32(-100) == -36 +; run: %srem_imm_pow2_more_i32(63) == 63 +; run: %srem_imm_pow2_more_i32(-63) == -63 + +; Test larger non-power-of-2 immediates +function %srem_imm_large_i64(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 1337 + return v1 +} +; run: %srem_imm_large_i64(10000) == 641 +; run: %srem_imm_large_i64(-10000) == -641 +; run: %srem_imm_large_i64(1336) == 1336 +; run: %srem_imm_large_i64(-1336) == -1336 + +function %srem_imm_prime_i64(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 97 + return v1 +} +; run: %srem_imm_prime_i64(1000) == 30 +; run: %srem_imm_prime_i64(-1000) == -30 +; run: %srem_imm_prime_i64(96) == 96 +; run: %srem_imm_prime_i64(-96) == -96 From 7a28ee19145dfb98a7d784933dd86ed653e612f5 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 4 Oct 2025 10:57:24 +0100 Subject: [PATCH 07/11] nicer rust --- cranelift/codegen/src/isle_prelude.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 525e2913b5b8..e4bf15cbf70f 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -44,10 +44,11 @@ macro_rules! isle_common_prelude_methods { let a_v: u64 = a.bits().cast_unsigned(); if a_v == 0 { // ctz(0) is defined to be the number of bits in the type. - return Imm64::new(i64::from(bits)); + Imm64::new(i64::from(bits)) + } else { + let lz = a_v.trailing_zeros(); + Imm64::new(i64::from(lz)) } - let lz = a_v.trailing_zeros(); - Imm64::new(i64::from(lz)) } #[inline] From 938539dcc5161751de3b0fdd178b205f156b9dac Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Tue, 7 Oct 2025 06:46:56 +0100 Subject: [PATCH 08/11] urem --- cranelift/codegen/src/opts/arithmetic.isle | 1 + 1 file changed, 1 insertion(+) diff --git a/cranelift/codegen/src/opts/arithmetic.isle b/cranelift/codegen/src/opts/arithmetic.isle index 7327d0d079ec..6c391ff6617c 100644 --- a/cranelift/codegen/src/opts/arithmetic.isle +++ b/cranelift/codegen/src/opts/arithmetic.isle @@ -125,6 +125,7 @@ ;; x % 1 == 0 (rule (simplify_skeleton (urem x (iconst_u ty 1))) (iconst_u ty 0)) (rule (simplify_skeleton (srem x (iconst_u ty 1))) (iconst_u ty 0)) +(rule (simplify_skeleton (srem x (iconst_u ty -1))) (iconst_u ty 0)) ;; Unsigned `x % d == x & ((1 << ilog2(d)) - 1)` when `d` is a power of two. (rule (simplify_skeleton (urem x (iconst_u ty (u64_extract_power_of_two d)))) From c1f0674a6c0f5c09feadf8c5e47fbd88bea6f88d Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Tue, 7 Oct 2025 06:58:15 +0100 Subject: [PATCH 09/11] srem --- cranelift/codegen/src/opts/arithmetic.isle | 2 +- cranelift/filetests/filetests/egraph/skeleton.clif | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/cranelift/codegen/src/opts/arithmetic.isle b/cranelift/codegen/src/opts/arithmetic.isle index 6c391ff6617c..aba392058def 100644 --- a/cranelift/codegen/src/opts/arithmetic.isle +++ b/cranelift/codegen/src/opts/arithmetic.isle @@ -125,7 +125,7 @@ ;; x % 1 == 0 (rule (simplify_skeleton (urem x (iconst_u ty 1))) (iconst_u ty 0)) (rule (simplify_skeleton (srem x (iconst_u ty 1))) (iconst_u ty 0)) -(rule (simplify_skeleton (srem x (iconst_u ty -1))) (iconst_u ty 0)) +(rule (simplify_skeleton (srem x (iconst_s ty -1))) (iconst_u ty 0)) ;; Unsigned `x % d == x & ((1 << ilog2(d)) - 1)` when `d` is a power of two. (rule (simplify_skeleton (urem x (iconst_u ty (u64_extract_power_of_two d)))) diff --git a/cranelift/filetests/filetests/egraph/skeleton.clif b/cranelift/filetests/filetests/egraph/skeleton.clif index 625d8e74639c..a1c3c79f3a75 100644 --- a/cranelift/filetests/filetests/egraph/skeleton.clif +++ b/cranelift/filetests/filetests/egraph/skeleton.clif @@ -151,10 +151,9 @@ block0: ;function %cprop_srem_i8_min() -> i8 fast { ;block0: -; v0 = iconst.i8 -128 -; v1 = iconst.i8 -1 -; v2 = srem v0, v1 ; v0 = -128, v1 = -1 -; return v2 +; v3 = iconst.i8 0 +; v2 -> v3 +; return v3 ; v3 = 0 ;} function %cprop_srem_i64_min() -> i64 { @@ -167,10 +166,9 @@ block0: ;function %cprop_srem_i64_min() -> i64 fast { ;block0: -; v0 = iconst.i64 -9223372036854775808 -; v1 = iconst.i64 -1 -; v2 = srem v0, v1 ; v0 = -9223372036854775808, v1 = -1 -; return v2 +; v3 = iconst.i64 0 +; v2 -> v3 +; return v3 ; v3 = 0 ;} function %cprop_srem() -> i32 { From e21aa8c88b10339e25a30d080a4bbb2a364b53e0 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal <45569241+KGrewal1@users.noreply.github.com> Date: Tue, 7 Oct 2025 08:08:06 +0200 Subject: [PATCH 10/11] Remove srem special casing iN::min % -1 = 0 --- cranelift/codegen/src/isle_prelude.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index e4bf15cbf70f..648cdc452981 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -83,15 +83,8 @@ macro_rules! isle_common_prelude_methods { let y = y.sign_extend_from_width(type_width).bits(); let shift = 64 - type_width; - // NB: We can't rely on `checked_div` to detect `ty::MIN / -1` - // (which overflows and should trap) because we are working with - // `i64` values here, and `i32::MIN != i64::MIN`, for - // example. Therefore, we have to explicitly check for this case - // ourselves. - let min = ((self.ty_smin(ty) as i64) << shift) >> shift; - if x == min && y == -1 { - return None; - } + // iN::min % -1 is defined as 0 in wasm so no need + // to check for it let result = x.checked_rem(y)?; Some(Imm64::new(result).mask_to_width(type_width)) From 65d54bb7494f5ec7b4365aff13a1e2fc4c9ac192 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal <45569241+KGrewal1@users.noreply.github.com> Date: Tue, 7 Oct 2025 08:09:22 +0200 Subject: [PATCH 11/11] Remove unneeded variable shift --- cranelift/codegen/src/isle_prelude.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 648cdc452981..6a9a57db40bf 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -81,7 +81,6 @@ macro_rules! isle_common_prelude_methods { assert!(type_width <= 64); let x = x.sign_extend_from_width(type_width).bits(); let y = y.sign_extend_from_width(type_width).bits(); - let shift = 64 - type_width; // iN::min % -1 is defined as 0 in wasm so no need // to check for it