diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index da1b648c6967..80b47d2199a0 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -997,6 +997,10 @@ (rule (lower (has_type (vr128_ty ty) (bnot x))) (vec_not ty x)) +;; Float version using vector NOR. +(rule 5 (lower (has_type (ty_scalar_float _) (bnot x))) + (vec_not $F64X2 x)) + ;; With z15 (bnot (bxor ...)) can be a single instruction, similar to the ;; (bxor _ (bnot _)) lowering. (rule 3 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bnot (bxor x y)))) @@ -1033,6 +1037,10 @@ (rule 0 (lower (has_type (vr128_ty ty) (band x y))) (vec_and ty x y)) +;; And two float registers, using vector overlay. +(rule 11 (lower (has_type (ty_scalar_float _) (band x y))) + (vec_and $F64X2 x y)) + ;; Specialized lowerings for `(band x (bnot y))` which is additionally produced ;; by Cranelift's `band_not` instruction that is legalized into the simpler ;; forms early on. @@ -1075,6 +1083,10 @@ (rule 0 (lower (has_type (vr128_ty ty) (bor x y))) (vec_or ty x y)) +;; Or two floating registers, using vector overlay +(rule 11 (lower (has_type (ty_scalar_float _) (bor x y))) + (vec_or $F64X2 x y)) + ;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced ;; by Cranelift's `bor_not` instruction that is legalized into the simpler ;; forms early on. @@ -1114,6 +1126,10 @@ (rule 0 (lower (has_type (vr128_ty ty) (bxor x y))) (vec_xor ty x y)) +;; Xor two floating registers, using vector overlay +(rule 9 (lower (has_type (ty_scalar_float _) (bxor x y))) + (vec_xor $F64X2 x y)) + ;; Specialized lowerings for `(bxor x (bnot y))` which is additionally produced ;; by Cranelift's `bxor_not` instruction that is legalized into the simpler ;; forms early on. diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif index 3ee0a4480735..c9eab079996d 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitwise.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -1076,3 +1076,67 @@ block0(v0: i32x4, v1: i32x4): ; vnx %v24, %v24, %v25 ; br %r14 +function %bnot_f64(f64) -> f64 { +block0(v0: f64): + v1 = bnot v0 + return v1 +} + +; VCode: +; block0: +; vno %v0, %v0, %v0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vno %v0, %v0, %v0 +; br %r14 + +function %band_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = band v0, v1 + return v2 +} + +; VCode: +; block0: +; vn %v0, %v0, %v2 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vn %v0, %v0, %v2 +; br %r14 + +function %bor_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = bor v0, v1 + return v2 +} + +; VCode: +; block0: +; vo %v0, %v0, %v2 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vo %v0, %v0, %v2 +; br %r14 + +function %bxor_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = bxor v0, v1 + return v2 +} + +; VCode: +; block0: +; vx %v0, %v0, %v2 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vx %v0, %v0, %v2 +; br %r14 + diff --git a/cranelift/filetests/filetests/runtests/fp-bitops.clif b/cranelift/filetests/filetests/runtests/fp-bitops.clif new file mode 100644 index 000000000000..e6d6ae65b458 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fp-bitops.clif @@ -0,0 +1,79 @@ +test interpret +test run +set opt_level=none +target s390x +target riscv64 +target riscv64 has_c has_zcb +target s390x has_mie3 +target x86_64 + +set opt_level=speed +target s390x +target riscv64 +target riscv64 has_c has_zcb +target s390x has_mie3 +target x86_64 + +function %test_bnot_f32(f32) -> f32 fast { +block0(v0: f32): + v2 = bnot v0 + return v2 +} + +; run: %test_bnot_f32(0x1.0) == -0x1.fffffep1 + +function %test_bnot_f64(f64) -> f64 fast { +block0(v0: f64): + v2 = bnot v0 + return v2 +} + +; run: %test_bnot_f64(0x1.0) == -0x1.fffffffffffffp1 + +function %test_band_f32(f32, f32) -> f32 fast { +block0(v0: f32, v1: f32): + v2 = band v0, v1 + return v2 +} + +; run: %test_band_f32(0x1.ff, 0x1.0ff) == 0x1.0fp0 + +function %test_band_f64(f64, f64) -> f64 fast { +block0(v0: f64, v1: f64): + v2 = band v0, v1 + return v2 +} + +; run: %test_band_f64(0x1.ff, 0x1.0ff) == 0x1.0fp0 + +function %test_bor_f32(f32, f32) -> f32 fast { +block0(v0: f32, v1: f32): + v2 = bor v0, v1 + return v2 +} + +; run: %test_bor_f32(0x1.ff, 0x1.0ff) == 0x1.fffp0 + +function %test_bor_f64(f64, f64) -> f64 fast { +block0(v0: f64, v1: f64): + v2 = bor v0, v1 + return v2 +} + +; run: %test_bor_f64(0x1.ff, 0x1.0ff) == 0x1.fffp0 + +function %test_bxor_f32(f32, f32) -> f32 fast { +block0(v0: f32, v1: f32): + v2 = bxor v0, v1 + return v2 +} + +; run: %test_bxor_f32(0x1.ff, 0x1.0ff) == 0x0.f0fp-126 + +function %test_bxor_f64(f64, f64) -> f64 fast { +block0(v0: f64, v1: f64): + v2 = bxor v0, v1 + return v2 +} + +; run: %test_bxor_f64(0x1.ff, 0x1.0ff) == 0x0.f0fp-1022