Skip to content

Commit bfcf4e3

Browse files
authored
Implement bitwise ops for floats on aarch64 (#12326)
And also restrict some lowering rules like `(bnot (band ...))` to integer types. Fixes #12316
1 parent 54c1192 commit bfcf4e3

File tree

4 files changed

+165
-23
lines changed

4 files changed

+165
-23
lines changed

cranelift/codegen/src/isa/aarch64/lower.isle

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,13 +1351,17 @@
13511351
;; Note that bitwise negation is implemented here as
13521352
;;
13531353
;; NOT rd, rm ==> ORR_NOT rd, zero, rm
1354-
(rule bnot_base_case -3 (lower (has_type (fits_in_64 ty) (bnot x)))
1354+
(rule bnot_base_case -4 (lower (has_type (fits_in_64 (ty_int ty)) (bnot x)))
13551355
(orr_not ty (zero_reg) x))
13561356

1357+
;; Implementation of `bnot` for floats.
1358+
(rule -3 (lower (has_type (fits_in_64 (ty_scalar_float ty)) (bnot x)))
1359+
(not x (float_vector_size_in_64 ty)))
1360+
13571361
;; Implementation of `bnot` for vector types.
1358-
(rule -2 (lower (has_type (ty_vec64 ty) (bnot x)))
1362+
(rule -2 (lower (has_type (ty_vec64 ty) (bnot x)))
13591363
(not x (vector_size ty)))
1360-
(rule -1 (lower (has_type (ty_vec128 ty) (bnot x)))
1364+
(rule -1 (lower (has_type (ty_vec128 ty) (bnot x)))
13611365
(not x (vector_size ty)))
13621366

13631367
;; Implementation of `bnot` for `i128`.
@@ -1371,7 +1375,7 @@
13711375

13721376
;; Special case to use `orr_not_shift` if it's a `bnot` of a const-left-shifted
13731377
;; value.
1374-
(rule bnot_ishl 1 (lower (has_type (fits_in_64 ty)
1378+
(rule bnot_ishl 1 (lower (has_type (fits_in_64 (ty_int ty))
13751379
(bnot (ishl x (iconst k)))))
13761380
(if-let amt (lshl_from_imm64 ty k))
13771381
(orr_not_shift ty (zero_reg) x amt))
@@ -1383,26 +1387,26 @@
13831387

13841388
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13851389

1386-
(rule band_fits_in_64 -4 (lower (has_type (fits_in_64 (ty_int ty)) (band x y)))
1390+
(rule band_fits_in_64 -5 (lower (has_type (fits_in_64 (ty_int ty)) (band x y)))
13871391
(alu_rs_imm_logic_commutative (ALUOp.And) ty x y))
13881392

1389-
(rule -3 (lower (has_type (fits_in_64 (ty_scalar_float ty)) (band x y)))
1393+
(rule -4 (lower (has_type (fits_in_64 (ty_scalar_float ty)) (band x y)))
13901394
(and_vec x y (float_vector_size_in_64 ty)))
13911395

13921396
;; Implementation of `band` for vector types.
1393-
(rule -2 (lower (has_type (ty_vec64 ty) (band x y)))
1397+
(rule -2 (lower (has_type (ty_vec64 ty) (band x y)))
13941398
(and_vec x y (vector_size ty)))
1395-
(rule -1 (lower (has_type (ty_vec128 ty) (band x y)))
1399+
(rule -1 (lower (has_type (ty_vec128 ty) (band x y)))
13961400
(and_vec x y (vector_size ty)))
13971401

13981402
(rule (lower (has_type $I128 (band x y))) (i128_alu_bitop (ALUOp.And) $I64 x y))
13991403

14001404
;; Specialized lowerings for `(band x (bnot y))` which is additionally produced
14011405
;; by Cranelift's `band_not` instruction that is legalized into the simpler
14021406
;; forms early on.
1403-
(rule band_not_right 1 (lower (has_type (fits_in_64 ty) (band x (bnot y))))
1407+
(rule band_not_right 1 (lower (has_type (fits_in_64 (ty_int ty)) (band x (bnot y))))
14041408
(alu_rs_imm_logic (ALUOp.AndNot) ty x y))
1405-
(rule band_not_left 2 (lower (has_type (fits_in_64 ty) (band (bnot y) x)))
1409+
(rule band_not_left 2 (lower (has_type (fits_in_64 (ty_int ty)) (band (bnot y) x)))
14061410
(alu_rs_imm_logic (ALUOp.AndNot) ty x y))
14071411

14081412
(rule 3 (lower (has_type $I128 (band x (bnot y)))) (i128_alu_bitop (ALUOp.AndNot) $I64 x y))
@@ -1427,32 +1431,32 @@
14271431
(orr_vec x y (float_vector_size_in_64 ty)))
14281432

14291433
;; Implementation of `bor` for vector types.
1430-
(rule -2 (lower (has_type (ty_vec64 ty) (bor x y)))
1434+
(rule -2 (lower (has_type (ty_vec64 ty) (bor x y)))
14311435
(orr_vec x y (vector_size ty)))
1432-
(rule -1 (lower (has_type (ty_vec128 ty) (bor x y)))
1436+
(rule -1 (lower (has_type (ty_vec128 ty) (bor x y)))
14331437
(orr_vec x y (vector_size ty)))
14341438

14351439
(rule (lower (has_type $I128 (bor x y))) (i128_alu_bitop (ALUOp.Orr) $I64 x y))
14361440

14371441
;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced
14381442
;; by Cranelift's `bor_not` instruction that is legalized into the simpler
14391443
;; forms early on.
1440-
(rule bor_not_right 1 (lower (has_type (fits_in_64 ty) (bor x (bnot y))))
1444+
(rule bor_not_right 1 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (bnot y))))
14411445
(alu_rs_imm_logic (ALUOp.OrrNot) ty x y))
1442-
(rule bor_not_left 2 (lower (has_type (fits_in_64 ty) (bor (bnot y) x)))
1446+
(rule bor_not_left 2 (lower (has_type (fits_in_64 (ty_int ty)) (bor (bnot y) x)))
14431447
(alu_rs_imm_logic (ALUOp.OrrNot) ty x y))
14441448

14451449
(rule 3 (lower (has_type $I128 (bor x (bnot y)))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y))
14461450
(rule 4 (lower (has_type $I128 (bor (bnot y) x))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y))
14471451

1448-
(rule bor_not_right_vec64 5 (lower (has_type (ty_vec64 ty) (bor x (bnot y))))
1452+
(rule bor_not_right_vec64 5 (lower (has_type (ty_vec64 ty) (bor x (bnot y))))
14491453
(orn_vec x y (vector_size ty)))
1450-
(rule bor_not_left_vec64 6 (lower (has_type (ty_vec64 ty) (bor (bnot y) x)))
1454+
(rule bor_not_left_vec64 6 (lower (has_type (ty_vec64 ty) (bor (bnot y) x)))
14511455
(orn_vec x y (vector_size ty)))
14521456

1453-
(rule bor_not_right_vec128 7 (lower (has_type (ty_vec128 ty) (bor x (bnot y))))
1457+
(rule bor_not_right_vec128 7 (lower (has_type (ty_vec128 ty) (bor x (bnot y))))
14541458
(orn_vec x y (vector_size ty)))
1455-
(rule bor_not_left_vec128 8 (lower (has_type (ty_vec128 ty) (bor (bnot y) x)))
1459+
(rule bor_not_left_vec128 8 (lower (has_type (ty_vec128 ty) (bor (bnot y) x)))
14561460
(orn_vec x y (vector_size ty)))
14571461

14581462

@@ -1481,13 +1485,13 @@
14811485
(rule bxor_fits_in_64 -4 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
14821486
(alu_rs_imm_logic_commutative (ALUOp.Eor) ty x y))
14831487

1484-
(rule -3 (lower (has_type (fits_in_64 (ty_scalar_float ty)) (bxor x y)))
1488+
(rule -3 (lower (has_type (fits_in_64 (ty_scalar_float ty)) (bxor x y)))
14851489
(eor_vec x y (float_vector_size_in_64 ty)))
14861490

14871491
;; Implementation of `bxor` for vector types.
1488-
(rule -2 (lower (has_type (ty_vec64 ty) (bxor x y)))
1492+
(rule -2 (lower (has_type (ty_vec64 ty) (bxor x y)))
14891493
(eor_vec x y (vector_size ty)))
1490-
(rule -1 (lower (has_type (ty_vec128 ty) (bxor x y)))
1494+
(rule -1 (lower (has_type (ty_vec128 ty) (bxor x y)))
14911495
(eor_vec x y (vector_size ty)))
14921496

14931497
(rule (lower (has_type $I128 (bxor x y))) (i128_alu_bitop (ALUOp.Eor) $I64 x y))
@@ -1496,9 +1500,9 @@
14961500
;; by Cranelift's `bxor_not` instruction that is legalized into the simpler
14971501
;; forms early on.
14981502

1499-
(rule bxor_not_right 1 (lower (has_type (fits_in_64 ty) (bxor x (bnot y))))
1503+
(rule bxor_not_right 1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x (bnot y))))
15001504
(alu_rs_imm_logic (ALUOp.EorNot) ty x y))
1501-
(rule bxor_not_left 2 (lower (has_type (fits_in_64 ty) (bxor (bnot y) x)))
1505+
(rule bxor_not_left 2 (lower (has_type (fits_in_64 (ty_int ty)) (bxor (bnot y) x)))
15021506
(alu_rs_imm_logic (ALUOp.EorNot) ty x y))
15031507

15041508
(rule 3 (lower (has_type $I128 (bxor x (bnot y)))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y))

cranelift/filetests/filetests/isa/aarch64/bitops.clif

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,3 +1747,131 @@ block0(v0: i128, v1: i128):
17471747
; eon x1, x1, x3
17481748
; ret
17491749

1750+
function %bnot_of_f32(f32) -> f32 {
1751+
block0(v0: f32):
1752+
v1 = bnot v0
1753+
return v1
1754+
}
1755+
1756+
; VCode:
1757+
; block0:
1758+
; mvn v0.8b, v0.8b
1759+
; ret
1760+
;
1761+
; Disassembled:
1762+
; block0: ; offset 0x0
1763+
; mvn v0.8b, v0.8b
1764+
; ret
1765+
1766+
function %bnot_of_f64(f64) -> f64 {
1767+
block0(v0: f64):
1768+
v1 = bnot v0
1769+
return v1
1770+
}
1771+
1772+
; VCode:
1773+
; block0:
1774+
; mvn v0.8b, v0.8b
1775+
; ret
1776+
;
1777+
; Disassembled:
1778+
; block0: ; offset 0x0
1779+
; mvn v0.8b, v0.8b
1780+
; ret
1781+
1782+
function %band_f32(f32, f32) -> f32 {
1783+
block0(v0: f32, v1: f32):
1784+
v2 = band v0, v1
1785+
return v2
1786+
}
1787+
1788+
; VCode:
1789+
; block0:
1790+
; and v0.8b, v0.8b, v1.8b
1791+
; ret
1792+
;
1793+
; Disassembled:
1794+
; block0: ; offset 0x0
1795+
; and v0.8b, v0.8b, v1.8b
1796+
; ret
1797+
1798+
function %band_f64(f64, f64) -> f64 {
1799+
block0(v0: f64, v1: f64):
1800+
v2 = band v0, v1
1801+
return v2
1802+
}
1803+
1804+
; VCode:
1805+
; block0:
1806+
; and v0.8b, v0.8b, v1.8b
1807+
; ret
1808+
;
1809+
; Disassembled:
1810+
; block0: ; offset 0x0
1811+
; and v0.8b, v0.8b, v1.8b
1812+
; ret
1813+
1814+
function %bor_f32(f32, f32) -> f32 {
1815+
block0(v0: f32, v1: f32):
1816+
v2 = bor v0, v1
1817+
return v2
1818+
}
1819+
1820+
; VCode:
1821+
; block0:
1822+
; orr v0.8b, v0.8b, v1.8b
1823+
; ret
1824+
;
1825+
; Disassembled:
1826+
; block0: ; offset 0x0
1827+
; orr v0.8b, v0.8b, v1.8b
1828+
; ret
1829+
1830+
function %bor_f64(f64, f64) -> f64 {
1831+
block0(v0: f64, v1: f64):
1832+
v2 = bor v0, v1
1833+
return v2
1834+
}
1835+
1836+
; VCode:
1837+
; block0:
1838+
; orr v0.8b, v0.8b, v1.8b
1839+
; ret
1840+
;
1841+
; Disassembled:
1842+
; block0: ; offset 0x0
1843+
; orr v0.8b, v0.8b, v1.8b
1844+
; ret
1845+
1846+
function %bxor_f32(f32, f32) -> f32 {
1847+
block0(v0: f32, v1: f32):
1848+
v2 = bxor v0, v1
1849+
return v2
1850+
}
1851+
1852+
; VCode:
1853+
; block0:
1854+
; eor v0.8b, v0.8b, v1.8b
1855+
; ret
1856+
;
1857+
; Disassembled:
1858+
; block0: ; offset 0x0
1859+
; eor v0.8b, v0.8b, v1.8b
1860+
; ret
1861+
1862+
function %bxor_f64(f64, f64) -> f64 {
1863+
block0(v0: f64, v1: f64):
1864+
v2 = bxor v0, v1
1865+
return v2
1866+
}
1867+
1868+
; VCode:
1869+
; block0:
1870+
; eor v0.8b, v0.8b, v1.8b
1871+
; ret
1872+
;
1873+
; Disassembled:
1874+
; block0: ; offset 0x0
1875+
; eor v0.8b, v0.8b, v1.8b
1876+
; ret
1877+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
test compile
2+
target aarch64
3+
4+
function %band_not_f32(f32, f32) -> f32 {
5+
block0(v0: f32, v1: f32):
6+
v2 = bnot v1
7+
v3 = band v0, v2
8+
return v3
9+
}

cranelift/filetests/filetests/runtests/float-bitops.clif

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ target x86_64
44
target x86_64 has_avx
55
target riscv64
66
target riscv64 has_c has_zcb
7+
target aarch64
78

89
function %bnot_f32(f32) -> f32 {
910
block0(v0: f32):

0 commit comments

Comments
 (0)