Skip to content

Commit b5054fd

Browse files
committed
Implement wide-arithmetic for Winch
This commit implements the wide-arithmetic proposal for Winch on x64. This is mostly for me to get my feet wet doing things in Winch. The proposal itself is relatively modest with just four new instructions.
1 parent a40776c commit b5054fd

File tree

8 files changed

+238
-11
lines changed

8 files changed

+238
-11
lines changed

crates/fuzzing/src/generators/config.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,6 @@ impl WasmtimeConfig {
487487
config.threads_enabled = false;
488488
config.tail_call_enabled = false;
489489
config.reference_types_enabled = false;
490-
config.wide_arithmetic_enabled = false;
491490

492491
// Tuning the following engine options is currently not supported
493492
// by Winch.

tests/wast.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,6 @@ fn should_fail(test: &Path, strategy: Strategy) -> bool {
174174
"spec_testsuite/simd_store32_lane.wast",
175175
"spec_testsuite/simd_store64_lane.wast",
176176
"spec_testsuite/simd_store8_lane.wast",
177-
// wide arithmetic
178-
"misc_testsuite/wide-arithmetic.wast",
179177
];
180178

181179
if unsupported.iter().any(|part| test.ends_with(part)) {

winch/codegen/src/codegen/context.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,4 +590,22 @@ impl<'a> CodeGenContext<'a> {
590590
_ => {}
591591
});
592592
}
593+
594+
/// Prepares for emitting a binary operation where four 64-bit operands are
595+
/// used to produce two 64-bit operands, e.g. a 128-bit binop.
596+
pub fn binop128<F, M>(&mut self, masm: &mut M, emit: F)
597+
where
598+
F: FnOnce(&mut M, Reg, Reg, Reg, Reg) -> (TypedReg, TypedReg),
599+
M: MacroAssembler,
600+
{
601+
let rhs_hi = self.pop_to_reg(masm, None);
602+
let rhs_lo = self.pop_to_reg(masm, None);
603+
let lhs_hi = self.pop_to_reg(masm, None);
604+
let lhs_lo = self.pop_to_reg(masm, None);
605+
let (lo, hi) = emit(masm, lhs_lo.reg, lhs_hi.reg, rhs_lo.reg, rhs_hi.reg);
606+
self.free_reg(rhs_hi);
607+
self.free_reg(rhs_lo);
608+
self.stack.push(lo.into());
609+
self.stack.push(hi.into());
610+
}
593611
}

winch/codegen/src/isa/aarch64/masm.rs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ use crate::{
55
isa::reg::{writable, Reg, WritableReg},
66
masm::{
77
CalleeKind, DivKind, ExtendKind, FloatCmpKind, Imm as I, IntCmpKind,
8-
MacroAssembler as Masm, OperandSize, RegImm, RemKind, RoundingMode, SPOffset, ShiftKind,
9-
StackSlot, TrapCode, TruncKind,
8+
MacroAssembler as Masm, MulWideKind, OperandSize, RegImm, RemKind, RoundingMode, SPOffset,
9+
ShiftKind, StackSlot, TrapCode, TruncKind,
1010
},
1111
};
1212
use cranelift_codegen::{
@@ -673,6 +673,37 @@ impl Masm for MacroAssembler {
673673
fn current_code_offset(&self) -> CodeOffset {
674674
self.asm.buffer().cur_offset()
675675
}
676+
677+
fn add128(
678+
&mut self,
679+
dst_lo: WritableReg,
680+
dst_hi: WritableReg,
681+
lhs_lo: Reg,
682+
lhs_hi: Reg,
683+
rhs_lo: Reg,
684+
rhs_hi: Reg,
685+
) {
686+
let _ = (dst_lo, dst_hi, lhs_lo, lhs_hi, rhs_lo, rhs_hi);
687+
todo!()
688+
}
689+
690+
fn sub128(
691+
&mut self,
692+
dst_lo: WritableReg,
693+
dst_hi: WritableReg,
694+
lhs_lo: Reg,
695+
lhs_hi: Reg,
696+
rhs_lo: Reg,
697+
rhs_hi: Reg,
698+
) {
699+
let _ = (dst_lo, dst_hi, lhs_lo, lhs_hi, rhs_lo, rhs_hi);
700+
todo!()
701+
}
702+
703+
fn mul_wide(&mut self, context: &mut CodeGenContext, kind: MulWideKind) {
704+
let _ = (context, kind);
705+
todo!()
706+
}
676707
}
677708

678709
impl MacroAssembler {

winch/codegen/src/isa/x64/asm.rs

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
33
use crate::{
44
isa::reg::Reg,
5-
masm::{DivKind, ExtendKind, IntCmpKind, OperandSize, RemKind, RoundingMode, ShiftKind},
5+
masm::{
6+
DivKind, ExtendKind, IntCmpKind, MulWideKind, OperandSize, RemKind, RoundingMode, ShiftKind,
7+
},
68
};
79
use cranelift_codegen::{
810
ir::{
@@ -1363,6 +1365,45 @@ impl Assembler {
13631365
size: size.into(),
13641366
});
13651367
}
1368+
1369+
pub fn adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1370+
self.emit(Inst::AluRmiR {
1371+
size: size.into(),
1372+
op: AluRmiROpcode::Adc,
1373+
src1: dst.to_reg().into(),
1374+
src2: src.into(),
1375+
dst: dst.map(Into::into),
1376+
});
1377+
}
1378+
1379+
pub fn sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1380+
self.emit(Inst::AluRmiR {
1381+
size: size.into(),
1382+
op: AluRmiROpcode::Sbb,
1383+
src1: dst.to_reg().into(),
1384+
src2: src.into(),
1385+
dst: dst.map(Into::into),
1386+
});
1387+
}
1388+
1389+
pub fn mul_wide(
1390+
&mut self,
1391+
dst_lo: WritableReg,
1392+
dst_hi: WritableReg,
1393+
lhs: Reg,
1394+
rhs: Reg,
1395+
kind: MulWideKind,
1396+
size: OperandSize,
1397+
) {
1398+
self.emit(Inst::Mul {
1399+
signed: kind == MulWideKind::Signed,
1400+
size: size.into(),
1401+
src1: lhs.into(),
1402+
src2: rhs.into(),
1403+
dst_lo: dst_lo.to_reg().into(),
1404+
dst_hi: dst_hi.to_reg().into(),
1405+
});
1406+
}
13661407
}
13671408

13681409
/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,

winch/codegen/src/isa/x64/masm.rs

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ use super::{
66
};
77

88
use crate::masm::{
9-
DivKind, ExtendKind, FloatCmpKind, Imm as I, IntCmpKind, MacroAssembler as Masm, OperandSize,
10-
RegImm, RemKind, RoundingMode, ShiftKind, TrapCode, TruncKind, TRUSTED_FLAGS, UNTRUSTED_FLAGS,
9+
DivKind, ExtendKind, FloatCmpKind, Imm as I, IntCmpKind, MacroAssembler as Masm, MulWideKind,
10+
OperandSize, RegImm, RemKind, RoundingMode, ShiftKind, TrapCode, TruncKind, TRUSTED_FLAGS,
11+
UNTRUSTED_FLAGS,
1112
};
1213
use crate::{
1314
abi::{self, align_to, calculate_frame_adjustment, LocalSlot},
1415
codegen::{ptr_type_from_ptr_size, CodeGenContext, FuncEnv},
15-
stack::Val,
16+
stack::{TypedReg, Val},
1617
};
1718
use crate::{
1819
abi::{vmctx, ABI},
@@ -996,6 +997,68 @@ impl Masm for MacroAssembler {
996997
fn current_code_offset(&self) -> CodeOffset {
997998
self.asm.buffer().cur_offset()
998999
}
1000+
1001+
fn add128(
1002+
&mut self,
1003+
dst_lo: WritableReg,
1004+
dst_hi: WritableReg,
1005+
lhs_lo: Reg,
1006+
lhs_hi: Reg,
1007+
rhs_lo: Reg,
1008+
rhs_hi: Reg,
1009+
) {
1010+
Self::ensure_two_argument_form(&dst_lo.to_reg(), &lhs_lo);
1011+
Self::ensure_two_argument_form(&dst_hi.to_reg(), &lhs_hi);
1012+
self.asm.add_rr(rhs_lo, dst_lo, OperandSize::S64);
1013+
self.asm.adc_rr(rhs_hi, dst_hi, OperandSize::S64);
1014+
}
1015+
1016+
fn sub128(
1017+
&mut self,
1018+
dst_lo: WritableReg,
1019+
dst_hi: WritableReg,
1020+
lhs_lo: Reg,
1021+
lhs_hi: Reg,
1022+
rhs_lo: Reg,
1023+
rhs_hi: Reg,
1024+
) {
1025+
Self::ensure_two_argument_form(&dst_lo.to_reg(), &lhs_lo);
1026+
Self::ensure_two_argument_form(&dst_hi.to_reg(), &lhs_hi);
1027+
self.asm.sub_rr(rhs_lo, dst_lo, OperandSize::S64);
1028+
self.asm.sbb_rr(rhs_hi, dst_hi, OperandSize::S64);
1029+
}
1030+
1031+
fn mul_wide(&mut self, context: &mut CodeGenContext, kind: MulWideKind) {
1032+
// Reserve rax/rdx since they're required by the `mul_wide` instruction
1033+
// being used here.
1034+
let rax = context.reg(regs::rax(), self);
1035+
let rdx = context.reg(regs::rdx(), self);
1036+
1037+
// The rhs of this binop can be in any register
1038+
let rhs = context.pop_to_reg(self, None);
1039+
// Mark rax as allocatable. and then force the lhs operand to be placed
1040+
// in `rax`.
1041+
context.free_reg(rax);
1042+
let lhs = context.pop_to_reg(self, Some(rax));
1043+
1044+
self.asm.mul_wide(
1045+
writable!(rax),
1046+
writable!(rdx),
1047+
lhs.reg,
1048+
rhs.reg,
1049+
kind,
1050+
OperandSize::S64,
1051+
);
1052+
1053+
// No longer using the rhs register after the multiplication has been
1054+
// executed.
1055+
context.free_reg(rhs);
1056+
1057+
// The low bits of the result are in rax, where `lhs` was allocated to
1058+
context.stack.push(lhs.into());
1059+
// The high bits of the result are in rdx, which we previously reserved.
1060+
context.stack.push(Val::Reg(TypedReg::i64(rdx)));
1061+
}
9991062
}
10001063

10011064
impl MacroAssembler {

winch/codegen/src/masm.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ pub(crate) enum RemKind {
2727
Unsigned,
2828
}
2929

30+
#[derive(Eq, PartialEq)]
31+
pub(crate) enum MulWideKind {
32+
Signed,
33+
Unsigned,
34+
}
35+
3036
/// The direction to perform the memory move.
3137
#[derive(Debug, Clone, Eq, PartialEq)]
3238
pub(crate) enum MemMoveDirection {
@@ -1019,4 +1025,33 @@ pub(crate) trait MacroAssembler {
10191025

10201026
/// The current offset, in bytes from the beginning of the function.
10211027
fn current_code_offset(&self) -> CodeOffset;
1028+
1029+
/// Performs a 128-bit addition
1030+
fn add128(
1031+
&mut self,
1032+
dst_lo: WritableReg,
1033+
dst_hi: WritableReg,
1034+
lhs_lo: Reg,
1035+
lhs_hi: Reg,
1036+
rhs_lo: Reg,
1037+
rhs_hi: Reg,
1038+
);
1039+
1040+
/// Performs a 128-bit subtraction
1041+
fn sub128(
1042+
&mut self,
1043+
dst_lo: WritableReg,
1044+
dst_hi: WritableReg,
1045+
lhs_lo: Reg,
1046+
lhs_hi: Reg,
1047+
rhs_lo: Reg,
1048+
rhs_hi: Reg,
1049+
);
1050+
1051+
/// Performs a widening multiplication from two 64-bit operands into a
1052+
/// 128-bit result.
1053+
///
1054+
/// Note that some platforms require special handling of registers in this
1055+
/// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
1056+
fn mul_wide(&mut self, context: &mut CodeGenContext, kind: MulWideKind);
10221057
}

winch/codegen/src/visitor.rs

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
use crate::abi::RetArea;
88
use crate::codegen::{control_index, Callee, CodeGen, ControlStackFrame, FnCall};
99
use crate::masm::{
10-
DivKind, ExtendKind, FloatCmpKind, IntCmpKind, MacroAssembler, MemMoveDirection, OperandSize,
11-
RegImm, RemKind, RoundingMode, SPOffset, ShiftKind, TruncKind,
10+
DivKind, ExtendKind, FloatCmpKind, IntCmpKind, MacroAssembler, MemMoveDirection, MulWideKind,
11+
OperandSize, RegImm, RemKind, RoundingMode, SPOffset, ShiftKind, TruncKind,
1212
};
1313
use crate::reg::{writable, Reg};
1414
use crate::stack::{TypedReg, Val};
@@ -243,6 +243,10 @@ macro_rules! def_unsupported {
243243
(emit I64TruncSatF64U $($rest:tt)*) => {};
244244
(emit V128Load $($rest:tt)*) => {};
245245
(emit V128Store $($rest:tt)*) => {};
246+
(emit I64Add128 $($rest:tt)*) => {};
247+
(emit I64Sub128 $($rest:tt)*) => {};
248+
(emit I64MulWideS $($rest:tt)*) => {};
249+
(emit I64MulWideU $($rest:tt)*) => {};
246250

247251
(emit $unsupported:tt $($rest:tt)*) => {$($rest)*};
248252
}
@@ -2188,6 +2192,44 @@ where
21882192
);
21892193
}
21902194

2195+
fn visit_i64_add128(&mut self) {
2196+
self.context
2197+
.binop128(self.masm, |masm, lhs_lo, lhs_hi, rhs_lo, rhs_hi| {
2198+
masm.add128(
2199+
writable!(lhs_lo),
2200+
writable!(lhs_hi),
2201+
lhs_lo,
2202+
lhs_hi,
2203+
rhs_lo,
2204+
rhs_hi,
2205+
);
2206+
(TypedReg::i64(lhs_lo), TypedReg::i64(lhs_hi))
2207+
});
2208+
}
2209+
2210+
fn visit_i64_sub128(&mut self) {
2211+
self.context
2212+
.binop128(self.masm, |masm, lhs_lo, lhs_hi, rhs_lo, rhs_hi| {
2213+
masm.sub128(
2214+
writable!(lhs_lo),
2215+
writable!(lhs_hi),
2216+
lhs_lo,
2217+
lhs_hi,
2218+
rhs_lo,
2219+
rhs_hi,
2220+
);
2221+
(TypedReg::i64(lhs_lo), TypedReg::i64(lhs_hi))
2222+
});
2223+
}
2224+
2225+
fn visit_i64_mul_wide_s(&mut self) {
2226+
self.masm.mul_wide(&mut self.context, MulWideKind::Signed);
2227+
}
2228+
2229+
fn visit_i64_mul_wide_u(&mut self) {
2230+
self.masm.mul_wide(&mut self.context, MulWideKind::Unsigned);
2231+
}
2232+
21912233
wasmparser::for_each_operator!(def_unsupported);
21922234
}
21932235

0 commit comments

Comments
 (0)