Skip to content

Commit 44a9c22

Browse files
authored
Fix ISLE icmp optimization rules for vector inputs (#12335)
Closes #12328
1 parent 1cc0bcf commit 44a9c22

File tree

2 files changed

+63
-4
lines changed

2 files changed

+63
-4
lines changed

cranelift/codegen/src/opts/icmp.isle

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -313,10 +313,10 @@
313313
(rule (simplify (bxor ty (ult ty x y) (ult ty y x))) (ne ty x y))
314314

315315
;; a < b && a > b = false
316-
(rule (simplify (band ty (sgt ty x y) (slt ty x y))) (iconst_u ty 0))
317-
(rule (simplify (band ty (slt ty x y) (sgt ty x y))) (iconst_u ty 0))
318-
(rule (simplify (band ty (ugt ty x y) (ult ty x y))) (iconst_u ty 0))
319-
(rule (simplify (band ty (ult ty x y) (ugt ty x y))) (iconst_u ty 0))
316+
(rule (simplify (band (fits_in_64 ty) (sgt ty x y) (slt ty x y))) (iconst_u ty 0))
317+
(rule (simplify (band (fits_in_64 ty) (slt ty x y) (sgt ty x y))) (iconst_u ty 0))
318+
(rule (simplify (band (fits_in_64 ty) (ugt ty x y) (ult ty x y))) (iconst_u ty 0))
319+
(rule (simplify (band (fits_in_64 ty) (ult ty x y) (ugt ty x y))) (iconst_u ty 0))
320320
(rule
321321
(simplify (band ty (sgt ty x (iconst_s _ y)) (ult ty x (iconst_s _ y))))
322322
(if-let true (i64_gt_eq y 0))
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
test optimize
2+
set opt_level=speed
3+
target x86_64
4+
5+
function %sgt_slt_i32x4(i32x4, i32x4) -> i32x4 {
6+
block0(v0: i32x4, v1: i32x4):
7+
v2 = icmp sgt v0, v1
8+
v3 = icmp slt v0, v1
9+
v4 = band v2, v3
10+
return v4
11+
}
12+
13+
function %slt_sgt_i32x4(i32x4, i32x4) -> i32x4 {
14+
block0(v0: i32x4, v1: i32x4):
15+
v2 = icmp slt v0, v1
16+
v3 = icmp sgt v0, v1
17+
v4 = band v2, v3
18+
return v4
19+
}
20+
21+
function %ugt_ult_i32x4(i32x4, i32x4) -> i32x4 {
22+
block0(v0: i32x4, v1: i32x4):
23+
v2 = icmp ugt v0, v1
24+
v3 = icmp ult v0, v1
25+
v4 = band v2, v3
26+
return v4
27+
}
28+
29+
function %ult_ugt_i32x4(i32x4, i32x4) -> i32x4 {
30+
block0(v0: i32x4, v1: i32x4):
31+
v2 = icmp ult v0, v1
32+
v3 = icmp ugt v0, v1
33+
v4 = band v2, v3
34+
return v4
35+
}
36+
37+
function %sgt_slt_i64x2(i64x2, i64x2) -> i64x2 {
38+
block0(v0: i64x2, v1: i64x2):
39+
v2 = icmp sgt v0, v1
40+
v3 = icmp slt v0, v1
41+
v4 = band v2, v3
42+
return v4
43+
}
44+
45+
function %slt_sgt_i16x8(i16x8, i16x8) -> i16x8 {
46+
block0(v0: i16x8, v1: i16x8):
47+
v2 = icmp slt v0, v1
48+
v3 = icmp sgt v0, v1
49+
v4 = band v2, v3
50+
return v4
51+
}
52+
53+
function %ugt_ult_i8x16(i8x16, i8x16) -> i8x16 {
54+
block0(v0: i8x16, v1: i8x16):
55+
v2 = icmp ugt v0, v1
56+
v3 = icmp ult v0, v1
57+
v4 = band v2, v3
58+
return v4
59+
}

0 commit comments

Comments
 (0)