From 7b0ab68c9309f631e8df04988fe79e9e11922cec Mon Sep 17 00:00:00 2001 From: Jimmy Brisson Date: Wed, 24 Dec 2025 09:26:27 -0600 Subject: [PATCH 1/2] s390x: Emit instructions for bitwise FP ops cranelift requires that bitwise operations work across all data types, including floating point. The prior implementation of bitwise operations, xor in this example, would cause a panic with the message: no rule matched for term aluop_xor This patch adds lowerings for bitwise operations on floating point registers using the vector instructions and the vector register overlay property of the s390x register file. --- cranelift/codegen/src/isa/s390x/lower.isle | 16 +++++ .../filetests/isa/s390x/bitwise.clif | 64 +++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index da1b648c6967..80b47d2199a0 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -997,6 +997,10 @@ (rule (lower (has_type (vr128_ty ty) (bnot x))) (vec_not ty x)) +;; Float version using vector NOR. +(rule 5 (lower (has_type (ty_scalar_float _) (bnot x))) + (vec_not $F64X2 x)) + ;; With z15 (bnot (bxor ...)) can be a single instruction, similar to the ;; (bxor _ (bnot _)) lowering. (rule 3 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bnot (bxor x y)))) @@ -1033,6 +1037,10 @@ (rule 0 (lower (has_type (vr128_ty ty) (band x y))) (vec_and ty x y)) +;; And two float registers, using vector overlay. +(rule 11 (lower (has_type (ty_scalar_float _) (band x y))) + (vec_and $F64X2 x y)) + ;; Specialized lowerings for `(band x (bnot y))` which is additionally produced ;; by Cranelift's `band_not` instruction that is legalized into the simpler ;; forms early on. @@ -1075,6 +1083,10 @@ (rule 0 (lower (has_type (vr128_ty ty) (bor x y))) (vec_or ty x y)) +;; Or two floating registers, using vector overlay +(rule 11 (lower (has_type (ty_scalar_float _) (bor x y))) + (vec_or $F64X2 x y)) + ;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced ;; by Cranelift's `bor_not` instruction that is legalized into the simpler ;; forms early on. @@ -1114,6 +1126,10 @@ (rule 0 (lower (has_type (vr128_ty ty) (bxor x y))) (vec_xor ty x y)) +;; Xor two floating registers, using vector overlay +(rule 9 (lower (has_type (ty_scalar_float _) (bxor x y))) + (vec_xor $F64X2 x y)) + ;; Specialized lowerings for `(bxor x (bnot y))` which is additionally produced ;; by Cranelift's `bxor_not` instruction that is legalized into the simpler ;; forms early on. diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif index 3ee0a4480735..c9eab079996d 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitwise.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -1076,3 +1076,67 @@ block0(v0: i32x4, v1: i32x4): ; vnx %v24, %v24, %v25 ; br %r14 +function %bnot_f64(f64) -> f64 { +block0(v0: f64): + v1 = bnot v0 + return v1 +} + +; VCode: +; block0: +; vno %v0, %v0, %v0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vno %v0, %v0, %v0 +; br %r14 + +function %band_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = band v0, v1 + return v2 +} + +; VCode: +; block0: +; vn %v0, %v0, %v2 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vn %v0, %v0, %v2 +; br %r14 + +function %bor_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = bor v0, v1 + return v2 +} + +; VCode: +; block0: +; vo %v0, %v0, %v2 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vo %v0, %v0, %v2 +; br %r14 + +function %bxor_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = bxor v0, v1 + return v2 +} + +; VCode: +; block0: +; vx %v0, %v0, %v2 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vx %v0, %v0, %v2 +; br %r14 + From 67980be3ee5b2059abb19f9c36d57e30dbf49a45 Mon Sep 17 00:00:00 2001 From: Jimmy Brisson Date: Wed, 7 Jan 2026 11:19:07 -0600 Subject: [PATCH 2/2] Add test for bitops on fp pulley and aarch64 omitted as they currently fail this test --- .../filetests/runtests/fp-bitops.clif | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 cranelift/filetests/filetests/runtests/fp-bitops.clif diff --git a/cranelift/filetests/filetests/runtests/fp-bitops.clif b/cranelift/filetests/filetests/runtests/fp-bitops.clif new file mode 100644 index 000000000000..e6d6ae65b458 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fp-bitops.clif @@ -0,0 +1,79 @@ +test interpret +test run +set opt_level=none +target s390x +target riscv64 +target riscv64 has_c has_zcb +target s390x has_mie3 +target x86_64 + +set opt_level=speed +target s390x +target riscv64 +target riscv64 has_c has_zcb +target s390x has_mie3 +target x86_64 + +function %test_bnot_f32(f32) -> f32 fast { +block0(v0: f32): + v2 = bnot v0 + return v2 +} + +; run: %test_bnot_f32(0x1.0) == -0x1.fffffep1 + +function %test_bnot_f64(f64) -> f64 fast { +block0(v0: f64): + v2 = bnot v0 + return v2 +} + +; run: %test_bnot_f64(0x1.0) == -0x1.fffffffffffffp1 + +function %test_band_f32(f32, f32) -> f32 fast { +block0(v0: f32, v1: f32): + v2 = band v0, v1 + return v2 +} + +; run: %test_band_f32(0x1.ff, 0x1.0ff) == 0x1.0fp0 + +function %test_band_f64(f64, f64) -> f64 fast { +block0(v0: f64, v1: f64): + v2 = band v0, v1 + return v2 +} + +; run: %test_band_f64(0x1.ff, 0x1.0ff) == 0x1.0fp0 + +function %test_bor_f32(f32, f32) -> f32 fast { +block0(v0: f32, v1: f32): + v2 = bor v0, v1 + return v2 +} + +; run: %test_bor_f32(0x1.ff, 0x1.0ff) == 0x1.fffp0 + +function %test_bor_f64(f64, f64) -> f64 fast { +block0(v0: f64, v1: f64): + v2 = bor v0, v1 + return v2 +} + +; run: %test_bor_f64(0x1.ff, 0x1.0ff) == 0x1.fffp0 + +function %test_bxor_f32(f32, f32) -> f32 fast { +block0(v0: f32, v1: f32): + v2 = bxor v0, v1 + return v2 +} + +; run: %test_bxor_f32(0x1.ff, 0x1.0ff) == 0x0.f0fp-126 + +function %test_bxor_f64(f64, f64) -> f64 fast { +block0(v0: f64, v1: f64): + v2 = bxor v0, v1 + return v2 +} + +; run: %test_bxor_f64(0x1.ff, 0x1.0ff) == 0x0.f0fp-1022