Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 54 additions & 6 deletions cranelift/codegen/src/isle_prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,38 @@ macro_rules! isle_common_prelude_methods {
self.checked_add_with_type(ty, a, b).is_none()
}

#[inline]
fn imm64_clz(&mut self, ty: Type, a: Imm64) -> Imm64 {
let bits = ty.bits();
assert!(bits <= 64);
let clz_offset = 64 - bits;
let a_v: u64 = a.bits().cast_unsigned();
let lz = a_v.leading_zeros() - clz_offset;
Imm64::new(i64::from(lz))
}

#[inline]
fn imm64_ctz(&mut self, ty: Type, a: Imm64) -> Imm64 {
let bits = ty.bits();
assert!(bits <= 64);
let a_v: u64 = a.bits().cast_unsigned();
if a_v == 0 {
// ctz(0) is defined to be the number of bits in the type.
Imm64::new(i64::from(bits))
} else {
let lz = a_v.trailing_zeros();
Imm64::new(i64::from(lz))
}
}

#[inline]
fn imm64_sdiv(&mut self, ty: Type, x: Imm64, y: Imm64) -> Option<Imm64> {
// Sign extend `x` and `y`.
let shift = u32::checked_sub(64, ty.bits()).unwrap_or(0);
let x = (x.bits() << shift) >> shift;
let y = (y.bits() << shift) >> shift;
let type_width = ty.bits();
assert!(type_width <= 64);
let x = x.sign_extend_from_width(type_width).bits();
let y = y.sign_extend_from_width(type_width).bits();
let shift = 64 - type_width;

// NB: We can't rely on `checked_div` to detect `ty::MIN / -1`
// (which overflows and should trap) because we are working with
Expand All @@ -44,9 +70,31 @@ macro_rules! isle_common_prelude_methods {
return None;
}

let ty_mask = self.ty_mask(ty) as i64;
let result = x.checked_div(y)? & ty_mask;
Some(Imm64::new(result))
let result = x.checked_div(y)?;
Some(Imm64::new(result).mask_to_width(type_width))
}

#[inline]
fn imm64_srem(&mut self, ty: Type, x: Imm64, y: Imm64) -> Option<Imm64> {
// Sign extend `x` and `y`.
let type_width = ty.bits();
assert!(type_width <= 64);
let x = x.sign_extend_from_width(type_width).bits();
let y = y.sign_extend_from_width(type_width).bits();
let shift = 64 - type_width;

// NB: We can't rely on `checked_div` to detect `ty::MIN / -1`
// (which overflows and should trap) because we are working with
// `i64` values here, and `i32::MIN != i64::MIN`, for
// example. Therefore, we have to explicitly check for this case
// ourselves.
let min = ((self.ty_smin(ty) as i64) << shift) >> shift;
if x == min && y == -1 {
return None;
}

let result = x.checked_rem(y)?;
Some(Imm64::new(result).mask_to_width(type_width))
}

#[inline]
Expand Down
3 changes: 0 additions & 3 deletions cranelift/codegen/src/opts/arithmetic.isle
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@
;; x % 1 == 0
(rule (simplify_skeleton (urem x (iconst_u ty 1))) (iconst_u ty 0))
(rule (simplify_skeleton (srem x (iconst_u ty 1))) (iconst_u ty 0))
(rule (simplify_skeleton (srem x (iconst_s ty -1))) (iconst_u ty 0))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How come this was removed? Was this an incorrect optimization?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - in combination with the urem const optimization led to optimizing imin rem -1 to 0 instead of having it trap which it should

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah that's actually a point where Rust and WebAssembly differ. Rust panicks on i32::MIN % -1 while WebAssembly returns 0. CLIF's semantics generally follow wasms, so I believe that this is still a correct optimization.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that sufficient for CLIF (I guess at the least could cause issues in the rust cranelift backend if this occurs)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not really sure what you mean by your question?

Regardless this isn't something we can really change. Wasm i32.rem_s is directly translated to a CLIF srem instruction. The CLIF instruction needs to mach the wasm instruction as a result, where the semantics of wasm is that i32::MIN % -1 is 0.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@KGrewal1 I don't think we'll cause problems for cg_clif if we define srem to work with Wasm's semantics: we're defining it over a superset of the domain that Rust expects. Because other ISAs also work this way (e.g. aarch64 does not trap at all in divide/remainder operations), Rust already needs to reify its trapping semantics at a higher level; so the IR produced by cg_clif must necessarily contain the logic to panic on i32::MIN % -1, but that's already the case.


;; Unsigned `x % d == x & ((1 << ilog2(d)) - 1)` when `d` is a power of two.
(rule (simplify_skeleton (urem x (iconst_u ty (u64_extract_power_of_two d))))
Expand Down Expand Up @@ -339,5 +338,3 @@
;; (x + y) - y --> x
(rule (simplify (isub ty (iadd ty x y) x)) y)
(rule (simplify (isub ty (iadd ty x y) y)) x)


25 changes: 24 additions & 1 deletion cranelift/codegen/src/opts/cprop.isle
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
;; Constant propagation.

(rule (simplify
(clz (fits_in_64 ty)
(iconst ty kx)))
(subsume (iconst ty (imm64_clz ty kx))))


(rule (simplify
(ctz (fits_in_64 ty)
(iconst ty kx)))
(subsume (iconst ty (imm64_ctz ty kx))))

(rule (simplify
(iadd (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
Expand All @@ -20,16 +31,28 @@

(rule (simplify_skeleton
(sdiv (iconst ty k1)
(iconst _ k2)))
(iconst ty k2)))
(if-let d (imm64_sdiv ty k1 k2))
(iconst ty d))

(rule (simplify_skeleton
(srem (iconst ty k1)
(iconst ty k2)))
(if-let d (imm64_srem ty k1 k2))
(iconst ty d))

(rule (simplify_skeleton
(udiv (iconst_u ty k1)
(iconst_u ty k2)))
(if-let d (u64_checked_div k1 k2))
(iconst ty (imm64_masked ty d)))

(rule (simplify_skeleton
(urem (iconst_u ty k1)
(iconst_u ty k2)))
(if-let d (u64_checked_rem k1 k2))
(iconst ty (imm64_masked ty d)))

(rule (simplify
(bor (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/prelude.isle
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@
(decl pure partial imm64_sdiv (Type Imm64 Imm64) Imm64)
(extern constructor imm64_sdiv imm64_sdiv)

(decl pure partial imm64_srem (Type Imm64 Imm64) Imm64)
(extern constructor imm64_srem imm64_srem)

(decl pure imm64_shl (Type Imm64 Imm64) Imm64)
(extern constructor imm64_shl imm64_shl)

Expand All @@ -96,6 +99,12 @@
(decl pure imm64_icmp (Type IntCC Imm64 Imm64) Imm64)
(extern constructor imm64_icmp imm64_icmp)

(decl pure imm64_clz (Type Imm64) Imm64)
(extern constructor imm64_clz imm64_clz)

(decl pure imm64_ctz (Type Imm64) Imm64)
(extern constructor imm64_ctz imm64_ctz)

;; Each of these extractors tests whether the upper half of the input equals the
;; lower half of the input
(decl u128_replicated_u64 (u64) u128)
Expand Down
37 changes: 37 additions & 0 deletions cranelift/filetests/filetests/egraph/cprop.clif
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,43 @@ block0:
; check: v3 = iconst.i16 -2
; nextln: return v3

function %f0() -> i8 {
block0:
v1 = iconst.i8 51
v2 = clz.i8 v1
return v2
}

function %f0() -> i16 {
block0:
v1 = iconst.i16 51
v2 = clz.i16 v1
return v2
}

; check: v3 = iconst.i16 10
; nextln: return v3

function %f0() -> i16 {
block0:
v1 = iconst.i16 48
v2 = ctz.i16 v1
return v2
}

; check: v3 = iconst.i16 4
; nextln: return v3

function %f0() -> i16 {
block0:
v1 = iconst.i16 0
v2 = ctz.i16 v1
return v2
}

; check: v3 = iconst.i16 16
; nextln: return v3

function %ishl() -> i8 {
block0:
v0 = iconst.i8 1
Expand Down
80 changes: 79 additions & 1 deletion cranelift/filetests/filetests/egraph/skeleton.clif
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,21 @@ block0:
; return v18 ; v18 = 1
; }

function %cprop_urem() -> i32 {
block0:
v0 = iconst.i32 13
v1 = iconst.i32 7
v2 = urem v0, v1
return v2
}

; function %cprop_urem() -> i32 fast {
; block0:
; v37 = iconst.i32 6
; v2 -> v37
; return v37 ; v37 = 6
; }

function %cprop_sdiv() -> i32 {
block0:
v0 = iconst.i32 -7
Expand All @@ -110,6 +125,70 @@ block0:
; return v11 ; v11 = -1
; }

function %cprop_sdiv_i8_min() -> i8 {
block0:
v0 = iconst.i8 -128
v1 = iconst.i8 -1
v2 = sdiv v0, v1
return v2
}

;function %cprop_sdiv_i8_min() -> i8 fast {
;block0:
; v0 = iconst.i8 -128
; v1 = iconst.i8 -1
; v2 = sdiv v0, v1 ; v0 = -128, v1 = -1
; return v2
;}

function %cprop_srem_i8_min() -> i8 {
block0:
v0 = iconst.i8 -128
v1 = iconst.i8 -1
v2 = srem v0, v1
return v2
}

;function %cprop_srem_i8_min() -> i8 fast {
;block0:
; v0 = iconst.i8 -128
; v1 = iconst.i8 -1
; v2 = srem v0, v1 ; v0 = -128, v1 = -1
; return v2
;}

function %cprop_srem_i64_min() -> i64 {
block0:
v0 = iconst.i64 -9223372036854775808
v1 = iconst.i64 -1
v2 = srem v0, v1
return v2
}

;function %cprop_srem_i64_min() -> i64 fast {
;block0:
; v0 = iconst.i64 -9223372036854775808
; v1 = iconst.i64 -1
; v2 = srem v0, v1 ; v0 = -9223372036854775808, v1 = -1
; return v2
;}

function %cprop_srem() -> i32 {
block0:
v0 = iconst.i32 -17
v1 = iconst.i32 7
v2 = srem v0, v1
return v2
}

; function %cprop_srem() -> i32 fast {
; block0:
; v28 = iconst.i32 -3
; v2 -> v28
; return v28 ; v28 = -3
; }


function %udiv_by_one(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 1
Expand Down Expand Up @@ -228,4 +307,3 @@ block0:
; v2 = uadd_overflow_trap v0, v1, user42 ; v0 = -1, v1 = 1
; return v2
; }

Loading
Loading