Skip to content

Commit 99c5eb8

Browse files
authored
pulley: Implement some float simd ops (bytecodealliance#9869)
* pulley: Implement some float simd ops Gets a few more wast tests passing * Enable some cranelift runtests
1 parent 68976ba commit 99c5eb8

File tree

6 files changed

+79
-4
lines changed

6 files changed

+79
-4
lines changed

cranelift/codegen/src/isa/pulley_shared/lower.isle

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,11 +1131,15 @@
11311131

11321132
(rule (lower (has_type $F32 (fmax a b))) (pulley_fmaximum32 a b))
11331133
(rule (lower (has_type $F64 (fmax a b))) (pulley_fmaximum64 a b))
1134+
(rule (lower (has_type $F32X4 (fmax a b))) (pulley_vmaximumf32x4 a b))
1135+
(rule (lower (has_type $F64X2 (fmax a b))) (pulley_vmaximumf64x2 a b))
11341136

11351137
;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11361138

11371139
(rule (lower (has_type $F32 (fmin a b))) (pulley_fminimum32 a b))
11381140
(rule (lower (has_type $F64 (fmin a b))) (pulley_fminimum64 a b))
1141+
(rule (lower (has_type $F32X4 (fmin a b))) (pulley_vminimumf32x4 a b))
1142+
(rule (lower (has_type $F64X2 (fmin a b))) (pulley_vminimumf64x2 a b))
11391143

11401144
;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11411145

@@ -1183,6 +1187,8 @@
11831187

11841188
(rule (lower (has_type $F32 (fabs a))) (pulley_fabs32 a))
11851189
(rule (lower (has_type $F64 (fabs a))) (pulley_fabs64 a))
1190+
(rule (lower (has_type $F32X4 (fabs a))) (pulley_vabsf32x4 a))
1191+
(rule (lower (has_type $F64X2 (fabs a))) (pulley_vabsf64x2 a))
11861192

11871193
;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11881194

cranelift/filetests/filetests/runtests/simd-fabs.clif

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ target x86_64
55
set enable_multi_ret_implicit_sret
66
target riscv64 has_v
77
target riscv64 has_v has_c has_zcb
8+
target pulley32
9+
target pulley32be
10+
target pulley64
11+
target pulley64be
812

913
function %fabs_f32x4(f32x4) -> f32x4 {
1014
block0(v0: f32x4):

cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target x86_64 skylake
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
1115
block0(v0: f64x2, v1: f64x2):

crates/wast-util/src/lib.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -410,20 +410,16 @@ impl WastTest {
410410
"spec_testsuite/proposals/relaxed-simd/i8x16_relaxed_swizzle.wast",
411411
"spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast",
412412
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
413-
"spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast",
414413
"spec_testsuite/proposals/memory64/simd_lane.wast",
415-
"spec_testsuite/proposals/memory64/relaxed_min_max.wast",
416414
"spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
417415
"spec_testsuite/proposals/memory64/relaxed_dot_product.wast",
418416
"spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
419417
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
420418
"spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast",
421-
"spec_testsuite/simd_f32x4.wast",
422419
"spec_testsuite/simd_f32x4_arith.wast",
423420
"spec_testsuite/simd_f32x4_cmp.wast",
424421
"spec_testsuite/simd_f32x4_pmin_pmax.wast",
425422
"spec_testsuite/simd_f32x4_rounding.wast",
426-
"spec_testsuite/simd_f64x2.wast",
427423
"spec_testsuite/simd_f64x2_arith.wast",
428424
"spec_testsuite/simd_f64x2_cmp.wast",
429425
"spec_testsuite/simd_f64x2_pmin_pmax.wast",

pulley/src/interp.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4079,4 +4079,56 @@ impl ExtendedOpVisitor for Interpreter<'_> {
40794079
self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
40804080
ControlFlow::Continue(())
40814081
}
4082+
4083+
fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4084+
let a = self.state[src].get_f32x4();
4085+
self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
4086+
ControlFlow::Continue(())
4087+
}
4088+
4089+
fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4090+
let a = self.state[src].get_f64x2();
4091+
self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
4092+
ControlFlow::Continue(())
4093+
}
4094+
4095+
fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4096+
let mut a = self.state[operands.src1].get_f32x4();
4097+
let b = self.state[operands.src2].get_f32x4();
4098+
for (a, b) in a.iter_mut().zip(&b) {
4099+
*a = a.wasm_maximum(*b);
4100+
}
4101+
self.state[operands.dst].set_f32x4(a);
4102+
ControlFlow::Continue(())
4103+
}
4104+
4105+
fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4106+
let mut a = self.state[operands.src1].get_f64x2();
4107+
let b = self.state[operands.src2].get_f64x2();
4108+
for (a, b) in a.iter_mut().zip(&b) {
4109+
*a = a.wasm_maximum(*b);
4110+
}
4111+
self.state[operands.dst].set_f64x2(a);
4112+
ControlFlow::Continue(())
4113+
}
4114+
4115+
fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4116+
let mut a = self.state[operands.src1].get_f32x4();
4117+
let b = self.state[operands.src2].get_f32x4();
4118+
for (a, b) in a.iter_mut().zip(&b) {
4119+
*a = a.wasm_minimum(*b);
4120+
}
4121+
self.state[operands.dst].set_f32x4(a);
4122+
ControlFlow::Continue(())
4123+
}
4124+
4125+
fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4126+
let mut a = self.state[operands.src1].get_f64x2();
4127+
let b = self.state[operands.src2].get_f64x2();
4128+
for (a, b) in a.iter_mut().zip(&b) {
4129+
*a = a.wasm_minimum(*b);
4130+
}
4131+
self.state[operands.dst].set_f64x2(a);
4132+
ControlFlow::Continue(())
4133+
}
40824134
}

pulley/src/lib.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,19 @@ macro_rules! for_each_extended_op {
11161116
vneg32x4 = Vneg32x4 { dst: VReg, src: VReg };
11171117
/// `dst = -src`
11181118
vneg64x2 = Vneg64x2 { dst: VReg, src: VReg };
1119+
1120+
/// `dst = |src|`
1121+
vabsf32x4 = Vabsf32x4 { dst: VReg, src: VReg };
1122+
/// `dst = |src|`
1123+
vabsf64x2 = Vabsf64x2 { dst: VReg, src: VReg };
1124+
/// `dst = ieee_maximum(src1, src2)`
1125+
vmaximumf32x4 = Vmaximumf32x4 { operands: BinaryOperands<VReg> };
1126+
/// `dst = ieee_maximum(src1, src2)`
1127+
vmaximumf64x2 = Vmaximumf64x2 { operands: BinaryOperands<VReg> };
1128+
/// `dst = ieee_minimum(src1, src2)`
1129+
vminimumf32x4 = Vminimumf32x4 { operands: BinaryOperands<VReg> };
1130+
/// `dst = ieee_minimum(src1, src2)`
1131+
vminimumf64x2 = Vminimumf64x2 { operands: BinaryOperands<VReg> };
11191132
}
11201133
};
11211134
}

0 commit comments

Comments
 (0)