diff --git a/cranelift/assembler-x64/meta/src/dsl.rs b/cranelift/assembler-x64/meta/src/dsl.rs index 34c708286d09..9ae2d0cfe749 100644 --- a/cranelift/assembler-x64/meta/src/dsl.rs +++ b/cranelift/assembler-x64/meta/src/dsl.rs @@ -13,8 +13,8 @@ pub use encoding::{ }; pub use encoding::{rex, vex}; pub use features::{ALL_FEATURES, Feature, Features}; -pub use format::{Extension, Format, Location, Mutability, Operand, OperandKind}; -pub use format::{align, fmt, r, rw, sxl, sxq, sxw, w}; +pub use format::{Extension, Format, Location, Mutability, Operand, OperandKind, RegClass}; +pub use format::{align, fmt, implicit, r, rw, sxl, sxq, sxw, w}; /// Abbreviated constructor for an x64 instruction. pub fn inst( diff --git a/cranelift/assembler-x64/meta/src/dsl/features.rs b/cranelift/assembler-x64/meta/src/dsl/features.rs index a30b9d866baf..124b74a1768b 100644 --- a/cranelift/assembler-x64/meta/src/dsl/features.rs +++ b/cranelift/assembler-x64/meta/src/dsl/features.rs @@ -65,6 +65,7 @@ pub enum Feature { sse, sse2, ssse3, + sse41, } /// List all CPU features. @@ -80,6 +81,7 @@ pub const ALL_FEATURES: &[Feature] = &[ Feature::sse, Feature::sse2, Feature::ssse3, + Feature::sse41, ]; impl fmt::Display for Feature { @@ -90,6 +92,7 @@ impl fmt::Display for Feature { Feature::sse => write!(f, "sse"), Feature::sse2 => write!(f, "sse2"), Feature::ssse3 => write!(f, "ssse3"), + Feature::sse41 => write!(f, "sse41"), } } } diff --git a/cranelift/assembler-x64/meta/src/dsl/format.rs b/cranelift/assembler-x64/meta/src/dsl/format.rs index 864bdc1ee83c..8cdf82805f91 100644 --- a/cranelift/assembler-x64/meta/src/dsl/format.rs +++ b/cranelift/assembler-x64/meta/src/dsl/format.rs @@ -34,13 +34,12 @@ pub fn fmt( /// This function panics if the location is an immediate (i.e., an immediate /// cannot be written to). #[must_use] -pub fn rw(location: Location) -> Operand { - assert!(!matches!(location.kind(), OperandKind::Imm(_))); +pub fn rw(op: impl Into) -> Operand { + let op = op.into(); + assert!(!matches!(op.location.kind(), OperandKind::Imm(_))); Operand { - location, mutability: Mutability::ReadWrite, - extension: Extension::default(), - align: false, + ..op } } @@ -54,12 +53,11 @@ pub fn r(op: impl Into) -> Operand { /// An abbreviated constructor for a "write" operand. #[must_use] -pub fn w(location: Location) -> Operand { +pub fn w(op: impl Into) -> Operand { + let op = op.into(); Operand { - location, mutability: Mutability::Write, - extension: Extension::None, - align: false, + ..op } } @@ -67,10 +65,18 @@ pub fn w(location: Location) -> Operand { pub fn align(location: Location) -> Operand { assert!(location.uses_memory()); Operand { - location, - mutability: Mutability::Read, - extension: Extension::None, align: true, + ..Operand::from(location) + } +} + +/// An abbreviated constructor for an operand that is used by the instruction +/// but not visible in its disassembly. +pub fn implicit(location: Location) -> Operand { + assert!(matches!(location.kind(), OperandKind::FixedReg(_))); + Operand { + implicit: true, + ..Operand::from(location) } } @@ -84,10 +90,8 @@ pub fn align(location: Location) -> Operand { pub fn sxq(location: Location) -> Operand { assert!(location.bits() <= 64); Operand { - location, - mutability: Mutability::Read, extension: Extension::SignExtendQuad, - align: false, + ..Operand::from(location) } } @@ -101,10 +105,8 @@ pub fn sxq(location: Location) -> Operand { pub fn sxl(location: Location) -> Operand { assert!(location.bits() <= 32); Operand { - location, - mutability: Mutability::Read, extension: Extension::SignExtendLong, - align: false, + ..Operand::from(location) } } @@ -118,10 +120,8 @@ pub fn sxl(location: Location) -> Operand { pub fn sxw(location: Location) -> Operand { assert!(location.bits() <= 16); Operand { - location, - mutability: Mutability::Read, extension: Extension::SignExtendWord, - align: false, + ..Operand::from(location) } } @@ -204,6 +204,9 @@ pub struct Operand { /// address used in the operand must align to the size of the operand (e.g., /// `m128` must be 16-byte aligned). pub align: bool, + /// Some register operands are implicit: that is, they do not appear in the + /// disassembled output even though they are used in the instruction. + pub implicit: bool, } impl core::fmt::Display for Operand { @@ -213,6 +216,7 @@ impl core::fmt::Display for Operand { mutability, extension, align, + implicit, } = self; write!(f, "{location}")?; let mut flags = vec![]; @@ -225,6 +229,9 @@ impl core::fmt::Display for Operand { if *align != false { flags.push("align".to_owned()); } + if *implicit { + flags.push("implicit".to_owned()); + } if !flags.is_empty() { write!(f, "[{}]", flags.join(","))?; } @@ -237,11 +244,13 @@ impl From for Operand { let mutability = Mutability::default(); let extension = Extension::default(); let align = false; + let implicit = false; Self { location, mutability, extension, align, + implicit, } } } @@ -270,6 +279,9 @@ pub enum Location { ax, eax, rax, + dx, + edx, + rdx, cl, // Immediate values. @@ -307,9 +319,9 @@ impl Location { use Location::*; match self { al | cl | imm8 | r8 | rm8 | m8 => 8, - ax | imm16 | r16 | rm16 | m16 => 16, - eax | imm32 | r32 | rm32 | m32 | xmm_m32 => 32, - rax | r64 | rm64 | m64 | xmm_m64 => 64, + ax | dx | imm16 | r16 | rm16 | m16 => 16, + eax | edx | imm32 | r32 | rm32 | m32 | xmm_m32 => 32, + rax | rdx | r64 | rm64 | m64 | xmm_m64 => 64, xmm | xmm_m128 => 128, } } @@ -325,7 +337,8 @@ impl Location { pub fn uses_memory(&self) -> bool { use Location::*; match self { - al | cl | ax | eax | rax | imm8 | imm16 | imm32 | r8 | r16 | r32 | r64 | xmm => false, + al | ax | eax | rax | cl | dx | edx | rdx | imm8 | imm16 | imm32 | r8 | r16 | r32 + | r64 | xmm => false, rm8 | rm16 | rm32 | rm64 | xmm_m32 | xmm_m64 | xmm_m128 | m8 | m16 | m32 | m64 => true, } } @@ -337,8 +350,8 @@ impl Location { use Location::*; match self { imm8 | imm16 | imm32 => false, - al | ax | eax | rax | cl | r8 | r16 | r32 | r64 | rm8 | rm16 | rm32 | rm64 | xmm - | xmm_m32 | xmm_m64 | xmm_m128 | m8 | m16 | m32 | m64 => true, + al | ax | eax | rax | cl | dx | edx | rdx | r8 | r16 | r32 | r64 | rm8 | rm16 + | rm32 | rm64 | xmm | xmm_m32 | xmm_m64 | xmm_m128 | m8 | m16 | m32 | m64 => true, } } @@ -347,7 +360,7 @@ impl Location { pub fn kind(&self) -> OperandKind { use Location::*; match self { - al | ax | eax | rax | cl => OperandKind::FixedReg(*self), + al | ax | eax | rax | cl | dx | edx | rdx => OperandKind::FixedReg(*self), imm8 | imm16 | imm32 => OperandKind::Imm(*self), r8 | r16 | r32 | r64 | xmm => OperandKind::Reg(*self), rm8 | rm16 | rm32 | rm64 | xmm_m32 | xmm_m64 | xmm_m128 => OperandKind::RegMem(*self), @@ -364,9 +377,8 @@ impl Location { use Location::*; match self { imm8 | imm16 | imm32 | m8 | m16 | m32 | m64 => None, - al | ax | eax | rax | cl | r8 | r16 | r32 | r64 | rm8 | rm16 | rm32 | rm64 => { - Some(RegClass::Gpr) - } + al | ax | eax | rax | cl | dx | edx | rdx | r8 | r16 | r32 | r64 | rm8 | rm16 + | rm32 | rm64 => Some(RegClass::Gpr), xmm | xmm_m32 | xmm_m64 | xmm_m128 => Some(RegClass::Xmm), } } @@ -385,6 +397,9 @@ impl core::fmt::Display for Location { eax => write!(f, "eax"), rax => write!(f, "rax"), cl => write!(f, "cl"), + dx => write!(f, "dx"), + edx => write!(f, "edx"), + rdx => write!(f, "rdx"), r8 => write!(f, "r8"), r16 => write!(f, "r16"), diff --git a/cranelift/assembler-x64/meta/src/generate/format.rs b/cranelift/assembler-x64/meta/src/generate/format.rs index 014ec68aa63d..15b8964d3b51 100644 --- a/cranelift/assembler-x64/meta/src/generate/format.rs +++ b/cranelift/assembler-x64/meta/src/generate/format.rs @@ -14,17 +14,33 @@ impl dsl::Format { /// once Cranelift has switched to using this assembler predominantly /// (TODO). #[must_use] - pub fn generate_att_style_operands(&self) -> String { + pub(crate) fn generate_att_style_operands(&self) -> String { let ordered_ops: Vec<_> = self .operands .iter() + .filter(|o| !o.implicit) .rev() .map(|o| format!("{{{}}}", o.location)) .collect(); ordered_ops.join(", ") } - pub fn generate_rex_encoding(&self, f: &mut Formatter, rex: &dsl::Rex) { + #[must_use] + pub(crate) fn generate_implicit_operands(&self) -> String { + let ops: Vec<_> = self + .operands + .iter() + .filter(|o| o.implicit) + .map(|o| format!("{{{}}}", o.location)) + .collect(); + if ops.is_empty() { + String::new() + } else { + format!(" ;; implicit: {}", ops.join(", ")) + } + } + + pub(crate) fn generate_rex_encoding(&self, f: &mut Formatter, rex: &dsl::Rex) { self.generate_prefixes(f, rex); self.generate_rex_prefix(f, rex); self.generate_opcodes(f, rex); @@ -91,12 +107,17 @@ impl dsl::Format { fmtln!(f, "let dst = self.{dst}.enc();"); fmtln!(f, "let rex = RexPrefix::two_op(digit, dst, {bits});"); } + [FixedReg(_), RegMem(mem)] | [FixedReg(_), FixedReg(_), RegMem(mem)] => { + let digit = rex.digit.unwrap(); + fmtln!(f, "let digit = 0x{digit:x};"); + fmtln!(f, "let rex = self.{mem}.as_rex_prefix(digit, {bits});"); + } [Mem(dst), Imm(_)] | [RegMem(dst), Imm(_)] | [RegMem(dst)] => { let digit = rex.digit.unwrap(); fmtln!(f, "let digit = 0x{digit:x};"); fmtln!(f, "let rex = self.{dst}.as_rex_prefix(digit, {bits});"); } - [Reg(dst), RegMem(src)] => { + [Reg(dst), RegMem(src)] | [Reg(dst), RegMem(src), Imm(_)] => { fmtln!(f, "let dst = self.{dst}.enc();"); fmtln!(f, "let rex = self.{src}.as_rex_prefix(dst, {bits});"); } @@ -135,12 +156,17 @@ impl dsl::Format { fmtln!(f, "let digit = 0x{digit:x};"); fmtln!(f, "self.{reg}.encode_modrm(buf, digit);"); } - [Mem(mem), Imm(_)] | [RegMem(mem), Imm(_)] | [RegMem(mem)] => { + [Mem(mem), Imm(_)] + | [RegMem(mem), Imm(_)] + | [RegMem(mem)] + | [FixedReg(_), RegMem(mem)] + | [FixedReg(_), FixedReg(_), RegMem(mem)] => { let digit = rex.digit.unwrap(); fmtln!(f, "let digit = 0x{digit:x};"); fmtln!(f, "self.{mem}.encode_rex_suffixes(buf, off, digit, 0);"); } [Reg(reg), RegMem(mem)] + | [Reg(reg), RegMem(mem), Imm(_)] | [Mem(mem), Reg(reg)] | [RegMem(mem), Reg(reg)] | [RegMem(mem), Reg(reg), Imm(_)] diff --git a/cranelift/assembler-x64/meta/src/generate/inst.rs b/cranelift/assembler-x64/meta/src/generate/inst.rs index 2ef8a0dd5e4a..6f2af2917279 100644 --- a/cranelift/assembler-x64/meta/src/generate/inst.rs +++ b/cranelift/assembler-x64/meta/src/generate/inst.rs @@ -194,7 +194,7 @@ impl dsl::Inst { f.add_block( "fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result", |f| { - for op in &self.format.operands { + for op in self.format.operands.iter() { let location = op.location; let to_string = location.generate_to_string(op.extension); fmtln!(f, "let {location} = {to_string};"); @@ -207,7 +207,8 @@ impl dsl::Inst { &self.mnemonic }; let ordered_ops = self.format.generate_att_style_operands(); - fmtln!(f, "write!(f, \"{inst_name} {ordered_ops}\")"); + let implicit_ops = self.format.generate_implicit_operands(); + fmtln!(f, "write!(f, \"{inst_name} {ordered_ops}{implicit_ops}\")"); }, ); }, diff --git a/cranelift/assembler-x64/meta/src/generate/operand.rs b/cranelift/assembler-x64/meta/src/generate/operand.rs index fc050718b034..188126347d6a 100644 --- a/cranelift/assembler-x64/meta/src/generate/operand.rs +++ b/cranelift/assembler-x64/meta/src/generate/operand.rs @@ -14,10 +14,11 @@ impl dsl::Operand { format!("Imm{bits}") } } - al | ax | eax | rax | cl => { + al | ax | eax | rax | cl | dx | edx | rdx => { let enc = match self.location { al | ax | eax | rax => "{ gpr::enc::RAX }", cl => "{ gpr::enc::RCX }", + dx | edx | rdx => "{ gpr::enc::RDX }", _ => unreachable!(), }; format!("Fixed") @@ -44,6 +45,9 @@ impl dsl::Location { eax => "\"%eax\"".into(), rax => "\"%rax\"".into(), cl => "\"%cl\"".into(), + dx => "\"%dx\"".into(), + edx => "\"%edx\"".into(), + rdx => "\"%rdx\"".into(), imm8 | imm16 | imm32 => { if extension.is_sign_extended() { let variant = extension.generate_variant(); @@ -67,7 +71,7 @@ impl dsl::Location { fn generate_size(&self) -> Option<&str> { use dsl::Location::*; match self { - al | ax | eax | rax | cl | imm8 | imm16 | imm32 => None, + al | ax | eax | rax | cl | dx | edx | rdx | imm8 | imm16 | imm32 => None, r8 | rm8 => Some("Size::Byte"), r16 | rm16 => Some("Size::Word"), r32 | rm32 => Some("Size::Doubleword"), diff --git a/cranelift/assembler-x64/meta/src/instructions.rs b/cranelift/assembler-x64/meta/src/instructions.rs index ab5048c42c30..82901917f48a 100644 --- a/cranelift/assembler-x64/meta/src/instructions.rs +++ b/cranelift/assembler-x64/meta/src/instructions.rs @@ -3,6 +3,7 @@ mod add; mod and; mod cvt; +mod mul; mod neg; mod or; mod shift; @@ -17,6 +18,7 @@ pub fn list() -> Vec { all.extend(add::list()); all.extend(and::list()); all.extend(cvt::list()); + all.extend(mul::list()); all.extend(neg::list()); all.extend(or::list()); all.extend(shift::list()); diff --git a/cranelift/assembler-x64/meta/src/instructions/mul.rs b/cranelift/assembler-x64/meta/src/instructions/mul.rs new file mode 100644 index 000000000000..c379a2c28a3b --- /dev/null +++ b/cranelift/assembler-x64/meta/src/instructions/mul.rs @@ -0,0 +1,39 @@ +use crate::dsl::{Feature::*, Inst, Location::*}; +use crate::dsl::{align, fmt, implicit, inst, r, rex, rw, sxl, sxq, sxw, w}; + +#[rustfmt::skip] // Keeps instructions on a single line. +pub fn list() -> Vec { + vec![ + // Multiply unsigned; low bits in `rax`, high bits in `rdx`. + inst("mulb", fmt("M", [rw(implicit(ax)), r(rm8)]), rex(0xF6).digit(4), _64b | compat), + inst("mulw", fmt("M", [rw(implicit(ax)), w(implicit(dx)), r(rm16)]), rex([0x66, 0xF7]).digit(4), _64b | compat), + inst("mull", fmt("M", [rw(implicit(eax)), w(implicit(edx)), r(rm32)]), rex(0xF7).digit(4), _64b | compat), + inst("mulq", fmt("M", [rw(implicit(rax)), w(implicit(rdx)), r(rm64)]), rex(0xF7).w().digit(4), _64b), + // Multiply signed; low bits in `rax`, high bits in `rdx`. + inst("imulb", fmt("M", [rw(implicit(ax)), r(rm8)]), rex(0xF6).digit(5), _64b | compat), + inst("imulw", fmt("M", [rw(implicit(ax)), w(implicit(dx)), r(rm16)]), rex([0x66, 0xF7]).digit(5), _64b | compat), + inst("imull", fmt("M", [rw(implicit(eax)), w(implicit(edx)), r(rm32)]), rex(0xF7).digit(5), _64b | compat), + inst("imulq", fmt("M", [rw(implicit(rax)), w(implicit(rdx)), r(rm64)]), rex(0xF7).w().digit(5), _64b), + inst("imulw", fmt("RM", [rw(r16), r(rm16)]), rex([0x66, 0x0F, 0xAF]), _64b | compat), + inst("imull", fmt("RM", [rw(r32), r(rm32)]), rex([0x0F, 0xAF]), _64b | compat), + inst("imulq", fmt("RM", [rw(r64), r(rm64)]), rex([0x0F, 0xAF]).w(), _64b), + inst("imulw", fmt("RMI_SXB", [w(r16), r(rm16), sxw(imm8)]), rex([0x66, 0x6B]).ib(), _64b | compat), + inst("imull", fmt("RMI_SXB", [w(r32), r(rm32), sxl(imm8)]), rex(0x6B).ib(), _64b | compat), + inst("imulq", fmt("RMI_SXB", [w(r64), r(rm64), sxq(imm8)]), rex(0x6B).w().ib(), _64b), + inst("imulw", fmt("RMI", [w(r16), r(rm16), r(imm16)]), rex([0x66, 0x69]).iw(), _64b | compat), + inst("imull", fmt("RMI", [w(r32), r(rm32), r(imm32)]), rex(0x69).id(), _64b | compat), + inst("imulq", fmt("RMI_SXL", [w(r64), r(rm64), sxq(imm32)]), rex(0x69).w().id(), _64b), + // Vector instructions. + inst("mulss", fmt("A", [rw(xmm), r(xmm_m32)]), rex([0xF3, 0x0F, 0x59]).r(), _64b | compat | sse), + inst("mulsd", fmt("A", [rw(xmm), r(xmm_m64)]), rex([0xF2, 0x0F, 0x59]).r(), _64b | compat | sse2), + inst("mulps", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x0F, 0x59]).r(), _64b | compat | sse), + inst("mulpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x59]).r(), _64b | compat | sse2), + inst("pmuldq", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x28]).r(), _64b | compat | sse41), + inst("pmulhrsw", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x0B]).r(), _64b | compat | ssse3), + inst("pmulhuw", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xE4]).r(), _64b | compat | sse2), + inst("pmulhw", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xE5]).r(), _64b | compat | sse2), + inst("pmulld", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x40]).r(), _64b | compat | sse41), + inst("pmullw", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xD5]).r(), _64b | compat | sse2), + inst("pmuludq", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xF4]).r(), _64b | compat | sse2), + ] +} diff --git a/cranelift/assembler-x64/src/fuzz.rs b/cranelift/assembler-x64/src/fuzz.rs index c8800d321dd8..05de086353f1 100644 --- a/cranelift/assembler-x64/src/fuzz.rs +++ b/cranelift/assembler-x64/src/fuzz.rs @@ -27,7 +27,7 @@ pub fn roundtrip(inst: &Inst) { // off the instruction offset first. let expected = expected.split_once(' ').unwrap().1; let actual = inst.to_string(); - if expected != actual && expected != replace_signed_immediates(&actual) { + if expected != actual && expected != fix_up(&actual) { println!("> {inst}"); println!(" debug: {inst:x?}"); println!(" assembled: {}", pretty_print_hexadecimal(&assembled)); @@ -166,6 +166,33 @@ fn replace() { ); } +/// Remove everything after the first semicolon in the disassembly and trim any +/// trailing spaces. This is necessary to remove the implicit operands we end up +/// printing for Cranelift's sake. +fn remove_after_semicolon(dis: &str) -> &str { + match dis.find(';') { + None => dis, + Some(idx) => { + let (prefix, _) = dis.split_at(idx); + prefix.trim() + } + } +} + +#[test] +fn remove_after_parenthesis_test() { + assert_eq!( + remove_after_semicolon("imulb 0x7658eddd(%rcx) ;; implicit: %ax"), + "imulb 0x7658eddd(%rcx)" + ); +} + +/// Run some post-processing on the disassembly to make it match Capstone. +fn fix_up(dis: &str) -> std::borrow::Cow { + let dis = remove_after_semicolon(dis); + replace_signed_immediates(&dis) +} + /// Fuzz-specific registers. /// /// For the fuzzer, we do not need any fancy register types; see [`FuzzReg`]. diff --git a/cranelift/codegen/meta/src/gen_asm.rs b/cranelift/codegen/meta/src/gen_asm.rs index d3e3c83d34de..193c866e212d 100644 --- a/cranelift/codegen/meta/src/gen_asm.rs +++ b/cranelift/codegen/meta/src/gen_asm.rs @@ -1,7 +1,7 @@ //! Generate the Cranelift-specific integration of the x64 assembler. use cranelift_assembler_x64_meta::dsl::{ - Format, Inst, Mutability, Operand, OperandKind, format::RegClass, + Format, Inst, Location, Mutability, Operand, OperandKind, RegClass, }; use cranelift_srcgen::{Formatter, fmtln}; @@ -89,10 +89,8 @@ pub fn rust_convert_isle_to_assembler(op: &Operand) -> String { /// This function panics if the instruction has no operands. pub fn generate_macro_inst_fn(f: &mut Formatter, inst: &Inst) { let struct_name = inst.name(); - let operands = inst.format.operands.iter().collect::>(); - let results = inst - .format - .operands + let operands = inst.format.operands.iter().cloned().collect::>(); + let results = operands .iter() .filter(|o| o.mutability.is_write()) .collect::>(); @@ -105,6 +103,7 @@ pub fn generate_macro_inst_fn(f: &mut Formatter, inst: &Inst) { f.add_block( &format!("fn x64_{struct_name}_raw(&mut self, {rust_params}) -> AssemblerOutputs"), |f| { + f.comment("Convert ISLE types to assembler types."); for op in operands.iter() { let loc = op.location; let cvt = rust_convert_isle_to_assembler(op); @@ -115,60 +114,69 @@ pub fn generate_macro_inst_fn(f: &mut Formatter, inst: &Inst) { .map(|o| format!("{}.clone()", o.location)) .collect::>(); let args = args.join(", "); + f.empty_line(); + + f.comment("Build the instruction."); fmtln!( f, "let inst = {ASM}::inst::{struct_name}::new({args}).into();" ); fmtln!(f, "let inst = MInst::External {{ inst }};"); + f.empty_line(); - use cranelift_assembler_x64_meta::dsl::Mutability::*; + // When an instruction writes to an operand, Cranelift expects a + // returned value to use in other instructions: we return this + // information in the `AssemblerOutputs` struct defined in ISLE + // (below). The general rule here is that memory stores will create + // a `SideEffect` whereas for write or read-write registers we will + // return some form of `Ret*`. + f.comment("Return a type ISLE can work with."); + let access_reg = |op: &Operand| match op.mutability { + Mutability::Read => unreachable!(), + Mutability::Write => "to_reg()", + Mutability::ReadWrite => "write.to_reg()", + }; + let ty_var_of_reg = |loc: Location| { + let ty = loc.reg_class().unwrap().to_string(); + let var = ty.to_lowercase(); + (ty, var) + }; match results.as_slice() { [] => fmtln!(f, "SideEffectNoResult::Inst(inst)"), - [one] => match one.mutability { - Read => unreachable!(), - Write => match one.location.kind() { - // One write-only register output? Output the - // instruction and that register. - OperandKind::Reg(r) => { - let ty = r.reg_class().unwrap().to_string(); - let var = ty.to_lowercase(); - fmtln!(f, "let {var} = {r}.as_ref().to_reg();"); - fmtln!(f, "AssemblerOutputs::Ret{ty} {{ inst, {var} }}"); - } - _ => unimplemented!(), - }, - ReadWrite => match one.location.kind() { - OperandKind::Imm(_) => unreachable!(), - // One read/write register output? Output the instruction - // and that register. - OperandKind::Reg(r) | OperandKind::FixedReg(r) => { - let ty = r.reg_class().unwrap().to_string(); - let var = ty.to_lowercase(); - fmtln!(f, "let {var} = {r}.as_ref().write.to_reg();",); - fmtln!(f, "AssemblerOutputs::Ret{ty} {{ inst, {var} }}"); - } - // One read/write memory operand? Output a side effect. - OperandKind::Mem(_) => { - fmtln!(f, "AssemblerOutputs::SideEffect {{ inst }}") - } - // One read/write regmem output? We need to output - // everything and it'll internally disambiguate which was - // emitted (e.g. the mem variant or the register variant). - OperandKind::RegMem(rm) => { - assert_eq!(results.len(), 1); - let ty = rm.reg_class().unwrap().to_string(); - let var = ty.to_lowercase(); - f.add_block(&format!("match {rm}"), |f| { - f.add_block(&format!("asm::{ty}Mem::{ty}(reg) => "), |f| { - fmtln!(f, "let {var} = reg.write.to_reg();"); - fmtln!(f, "AssemblerOutputs::Ret{ty} {{ inst, {var} }} "); - }); - f.add_block(&format!("asm::{ty}Mem::Mem(_) => "), |f| { - fmtln!(f, "AssemblerOutputs::SideEffect {{ inst }} "); - }); + [op] => match op.location.kind() { + OperandKind::Imm(_) => unreachable!(), + OperandKind::Reg(r) | OperandKind::FixedReg(r) => { + let (ty, var) = ty_var_of_reg(r); + fmtln!(f, "let {var} = {r}.as_ref().{};", access_reg(op)); + fmtln!(f, "AssemblerOutputs::Ret{ty} {{ inst, {var} }}"); + } + OperandKind::Mem(_) => { + fmtln!(f, "AssemblerOutputs::SideEffect {{ inst }}") + } + OperandKind::RegMem(rm) => { + let (ty, var) = ty_var_of_reg(rm); + f.add_block(&format!("match {rm}"), |f| { + f.add_block(&format!("{ASM}::{ty}Mem::{ty}(reg) => "), |f| { + fmtln!(f, "let {var} = reg.{};", access_reg(op)); + fmtln!(f, "AssemblerOutputs::Ret{ty} {{ inst, {var} }} "); + }); + f.add_block(&format!("{ASM}::{ty}Mem::Mem(_) => "), |f| { + fmtln!(f, "AssemblerOutputs::SideEffect {{ inst }} "); }); - } - }, + }); + } + }, + // For now, we assume that if there are two results, they are + // coming from a register-writing instruction like `mul`. The + // `match` below can be expanded as needed. + [op1, op2] => match (op1.location.kind(), op2.location.kind()) { + (OperandKind::FixedReg(loc1), OperandKind::FixedReg(loc2)) => { + fmtln!(f, "let one = {loc1}.as_ref().{}.to_reg();", access_reg(op1)); + fmtln!(f, "let two = {loc2}.as_ref().{}.to_reg();", access_reg(op2)); + fmtln!(f, "let regs = ValueRegs::two(one, two);"); + fmtln!(f, "AssemblerOutputs::RetValueRegs {{ inst, regs }}"); + } + _ => unimplemented!("unhandled results: {results:?}"), }, _ => panic!("instruction has more than one result"), } @@ -234,13 +242,16 @@ pub enum IsleConstructor { /// a result in memory, however. RetMemorySideEffect, - /// This constructor produces a `Gpr` value, meaning that it will write the - /// result to a `Gpr`. + /// This constructor produces a `Gpr` value, meaning that the instruction + /// will write its result to a single GPR register. RetGpr, - /// This constructor produces an `Xmm` value, meaning that it will write the - /// result to an `Xmm`. + /// This is similar to `RetGpr`, but for XMM registers. RetXmm, + + /// This "special" constructor captures multiple written-to registers (e.g. + /// `mul`). + RetValueRegs, } impl IsleConstructor { @@ -250,6 +261,7 @@ impl IsleConstructor { IsleConstructor::RetMemorySideEffect => "SideEffectNoResult", IsleConstructor::RetGpr => "Gpr", IsleConstructor::RetXmm => "Xmm", + IsleConstructor::RetValueRegs => "ValueRegs", } } @@ -260,6 +272,7 @@ impl IsleConstructor { IsleConstructor::RetMemorySideEffect => "defer_side_effect", IsleConstructor::RetGpr => "emit_ret_gpr", IsleConstructor::RetXmm => "emit_ret_xmm", + IsleConstructor::RetValueRegs => "emit_ret_value_regs", } } @@ -267,8 +280,7 @@ impl IsleConstructor { pub fn suffix(&self) -> &'static str { match self { IsleConstructor::RetMemorySideEffect => "_mem", - IsleConstructor::RetGpr => "", - IsleConstructor::RetXmm => "", + IsleConstructor::RetGpr | IsleConstructor::RetXmm | IsleConstructor::RetValueRegs => "", } } } @@ -285,6 +297,7 @@ pub fn isle_param_for_ctor(op: &Operand, ctor: IsleConstructor) -> String { IsleConstructor::RetMemorySideEffect => "Amode".to_string(), IsleConstructor::RetGpr => "Gpr".to_string(), IsleConstructor::RetXmm => "Xmm".to_string(), + IsleConstructor::RetValueRegs => "ValueRegs".to_string(), }, // everything else is the same as the "raw" variant @@ -336,6 +349,14 @@ pub fn isle_constructors(format: &Format) -> Vec { }, }, }, + [one, two] => { + // For now, we assume that if there are two results, they are coming + // from a register-writing instruction like `mul`. This can be + // expanded as needed. + assert!(matches!(one.location.kind(), FixedReg(_))); + assert!(matches!(two.location.kind(), FixedReg(_))); + vec![IsleConstructor::RetValueRegs] + } other => panic!("unsupported number of write operands {other:?}"), } } @@ -435,8 +456,8 @@ pub fn generate_isle(f: &mut Formatter, insts: &[Inst]) { fmtln!(f, " ;; Used for instructions that return an"); fmtln!(f, " ;; XMM register."); fmtln!(f, " (RetXmm (inst MInst) (xmm Xmm))"); - fmtln!(f, " ;; TODO: eventually add more variants for"); - fmtln!(f, " ;; multi-return, XMM, etc.; see"); + fmtln!(f, " ;; Used for multi-return instructions."); + fmtln!(f, " (RetValueRegs (inst MInst) (regs ValueRegs))"); fmtln!( f, " ;; https://github.com/bytecodealliance/wasmtime/pull/10276" @@ -457,6 +478,16 @@ pub fn generate_isle(f: &mut Formatter, insts: &[Inst]) { fmtln!(f, " (let ((_ Unit (emit inst))) xmm))"); f.empty_line(); + fmtln!(f, ";; Directly emit instructions that return multiple"); + fmtln!(f, ";; registers (e.g. `mul`)."); + fmtln!(f, "(decl emit_ret_value_regs (AssemblerOutputs) ValueRegs)"); + fmtln!( + f, + "(rule (emit_ret_value_regs (AssemblerOutputs.RetValueRegs inst regs))" + ); + fmtln!(f, " (let ((_ Unit (emit inst))) regs))"); + f.empty_line(); + fmtln!(f, ";; Pass along the side-effecting instruction"); fmtln!(f, ";; for later emission."); fmtln!( diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 29e1c1d01053..666afe10a74c 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -62,15 +62,6 @@ (dividend Gpr) (dst WritableGpr)) - ;; Unsigned multiplication producing the high bits of the result in one - ;; register and the low bits in another register. - (Mul (size OperandSize) - (signed bool) - (src1 Gpr) - (src2 GprMem) - (dst_lo WritableGpr) - (dst_hi WritableGpr)) - ;; Same as `Mul`, but for the BMI2 `mulx` instruction. This is different ;; where the two `dst_*` registers can be arbitrary registers and it ;; is always unsigned multiplication. Note that this instruction does @@ -86,26 +77,6 @@ (dst_lo WritableGpr) (dst_hi WritableGpr)) - ;; Same as `Mul` but the 16-bit multiplication result is stored in `AX`. - (Mul8 (signed bool) - (src1 Gpr) - (src2 GprMem) - (dst WritableGpr)) - - ;; The two-operand form of `imul` which produces a truncated same-size - ;; result as the operands. - (IMul (size OperandSize) - (src1 Gpr) - (src2 GprMem) - (dst WritableGpr)) - - ;; The three-operand form of `imul` where the third operand must be - ;; a constant. - (IMulImm (size OperandSize) - (src1 GprMem) - (src2 i32) - (dst WritableGpr)) - ;; A synthetic instruction sequence used as part of the lowering of the ;; `srem` instruction which returns 0 if the divisor is -1 and ;; otherwise executes an `idiv` instruction. @@ -2693,6 +2664,8 @@ (decl asm_produce_flags (AssemblerOutputs) ProducesFlags) (rule (asm_produce_flags (AssemblerOutputs.RetGpr inst gpr)) (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst gpr)) +(rule (asm_produce_flags (AssemblerOutputs.RetValueRegs inst regs)) + (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst (value_regs_get_gpr regs 0))) ;; Other operations consume _and_ produce flags--"chaining". (type ChainFlagsOp (enum (Adc) (Sbb))) @@ -2883,15 +2856,18 @@ ;; Helper for creating `mul` instructions or `imul` instructions (depending -;; on `signed`) -(decl x64_mul (Type bool Gpr GprMem) ValueRegs) -(rule (x64_mul ty signed src1 src2) - (let ((dst_lo WritableGpr (temp_writable_gpr)) - (dst_hi WritableGpr (temp_writable_gpr)) - (size OperandSize (raw_operand_size_of_type ty)) - (_ Unit (emit (MInst.Mul size signed src1 src2 dst_lo dst_hi)))) - (value_gprs dst_lo dst_hi))) +;; on `signed`). For the 8-bit rules, see `x64_mul8`. +(decl x64_mul_raw (Type bool Gpr GprMem) AssemblerOutputs) +(rule (x64_mul_raw $I16 false src1 src2) (x64_mulw_m_raw src1 src2)) +(rule (x64_mul_raw $I32 false src1 src2) (x64_mull_m_raw src1 src2)) +(rule (x64_mul_raw $I64 false src1 src2) (x64_mulq_m_raw src1 src2)) +(rule (x64_mul_raw $I16 true src1 src2) (x64_imulw_m_raw src1 src2)) +(rule (x64_mul_raw $I32 true src1 src2) (x64_imull_m_raw src1 src2)) +(rule (x64_mul_raw $I64 true src1 src2) (x64_imulq_m_raw src1 src2)) +(decl x64_mul (Type bool Gpr GprMem) ValueRegs) +(rule 0 (x64_mul ty signed src1 src2) + (emit_ret_value_regs (x64_mul_raw ty signed src1 src2))) ;; Special case the `mulx` pattern with the BMI2 instruction set. (rule 1 (x64_mul (ty_32_or_64 ty) false src1 src2) (if-let true (use_bmi2)) @@ -2908,49 +2884,43 @@ (_ Unit (emit (MInst.MulX size src1 src2 (writable_invalid_gpr) dst)))) dst)) +(decl x64_mul_lo_with_flags_paired (Type bool Gpr GprMem) ProducesFlags) +(rule (x64_mul_lo_with_flags_paired ty signed src1 src2) + (asm_produce_flags (x64_mul_raw ty signed src1 src2))) + ;; Get the invalid register as writable (decl writable_invalid_gpr () WritableGpr) (extern constructor writable_invalid_gpr writable_invalid_gpr) -;; Helper for creating `mul` instructions or `imul` instructions (depending -;; on `signed`) for 8-bit operands. -(decl x64_mul8 (bool Gpr GprMem) Gpr) -(rule (x64_mul8 signed src1 src2) - (let ((dst WritableGpr (temp_writable_gpr)) - (_ Unit (emit (MInst.Mul8 signed src1 src2 dst)))) - dst)) - ;; Helper for creating `imul` instructions. (decl x64_imul (Type Gpr GprMem) Gpr) -(rule (x64_imul ty src1 src2) - (let ((dst WritableGpr (temp_writable_gpr)) - (size OperandSize (raw_operand_size_of_type ty)) - (_ Unit (emit (MInst.IMul size src1 src2 dst)))) - dst)) +(rule (x64_imul $I16 src1 src2) (x64_imulw_rm src1 src2)) +(rule (x64_imul $I32 src1 src2) (x64_imull_rm src1 src2)) +(rule (x64_imul $I64 src1 src2) (x64_imulq_rm src1 src2)) -;; Helper for creating `imul` instructions with an immediate operand. +;; Helper for creating `imul` instructions with an immediate operand. Match +;; 8-bit immediates first to allow a smaller instruction encoding. (decl x64_imul_imm (Type GprMem i32) Gpr) -(rule (x64_imul_imm ty src1 src2) - (let ((dst WritableGpr (temp_writable_gpr)) - (size OperandSize (raw_operand_size_of_type ty)) - (_ Unit (emit (MInst.IMulImm size src1 src2 dst)))) - dst)) +(rule 2 (x64_imul_imm $I16 src1 (i8_try_from_i32 src2)) (x64_imulw_rmi_sxb src1 src2)) +(rule 2 (x64_imul_imm $I32 src1 (i8_try_from_i32 src2)) (x64_imull_rmi_sxb src1 src2)) +(rule 2 (x64_imul_imm $I64 src1 (i8_try_from_i32 src2)) (x64_imulq_rmi_sxb src1 src2)) +(rule 1 (x64_imul_imm $I16 src1 (u16_try_from_i32 src2)) (x64_imulw_rmi src1 src2)) +(rule 1 (x64_imul_imm $I32 src1 (i32_as_u32 src2)) (x64_imull_rmi src1 src2)) +(rule 1 (x64_imul_imm $I64 src1 src2) (x64_imulq_rmi_sxl src1 src2)) + +;; Helper for creating `mul` instructions or `imul` instructions (depending +;; on `signed`) for 8-bit operands. +(decl x64_mul8_raw (bool Gpr GprMem) AssemblerOutputs) +(rule (x64_mul8_raw false src1 src2) (x64_mulb_m_raw src1 src2)) +(rule (x64_mul8_raw true src1 src2) (x64_imulb_m_raw src1 src2)) + +(decl x64_mul8 (bool Gpr GprMem) Gpr) +(rule (x64_mul8 signed src1 src2) + (emit_ret_gpr (x64_mul8_raw signed src1 src2))) (decl x64_mul8_with_flags_paired (bool Gpr GprMem) ProducesFlags) (rule (x64_mul8_with_flags_paired signed src1 src2) - (let ((dst WritableGpr (temp_writable_gpr))) - (ProducesFlags.ProducesFlagsReturnsResultWithConsumer - (MInst.Mul8 signed src1 src2 dst) - dst))) - -(decl x64_mul_lo_with_flags_paired (Type bool Gpr GprMem) ProducesFlags) -(rule (x64_mul_lo_with_flags_paired ty signed src1 src2) - (let ((dst_lo WritableGpr (temp_writable_gpr)) - (dst_hi WritableGpr (temp_writable_gpr)) - (size OperandSize (raw_operand_size_of_type ty))) - (ProducesFlags.ProducesFlagsReturnsResultWithConsumer - (MInst.Mul size signed src1 src2 dst_lo dst_hi) - dst_lo))) + (asm_produce_flags (x64_mul8_raw signed src1 src2))) @@ -3587,59 +3557,52 @@ ;; Helper for creating `pmullw` instructions. (decl x64_pmullw (Xmm XmmMem) Xmm) -(rule 0 (x64_pmullw src1 src2) - (xmm_rm_r (SseOpcode.Pmullw) src1 src2)) (rule 1 (x64_pmullw src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpmullw) src1 src2)) +(rule 0 (x64_pmullw src1 src2) (x64_pmullw_a src1 src2)) ;; Helper for creating `pmulld` instructions. (decl x64_pmulld (Xmm XmmMem) Xmm) -(rule 0 (x64_pmulld src1 src2) - (xmm_rm_r (SseOpcode.Pmulld) src1 src2)) (rule 1 (x64_pmulld src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpmulld) src1 src2)) +(rule 0 (x64_pmulld src1 src2) (x64_pmulld_a src1 src2)) ;; Helper for creating `pmulhw` instructions. (decl x64_pmulhw (Xmm XmmMem) Xmm) -(rule 0 (x64_pmulhw src1 src2) - (xmm_rm_r (SseOpcode.Pmulhw) src1 src2)) (rule 1 (x64_pmulhw src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpmulhw) src1 src2)) +(rule 0 (x64_pmulhw src1 src2) (x64_pmulhw_a src1 src2)) ;; Helper for creating `pmulhrsw` instructions. (decl x64_pmulhrsw (Xmm XmmMem) Xmm) -(rule 0 (x64_pmulhrsw src1 src2) - (xmm_rm_r (SseOpcode.Pmulhrsw) src1 src2)) (rule 1 (x64_pmulhrsw src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpmulhrsw) src1 src2)) +(rule 0 (x64_pmulhrsw src1 src2) (x64_pmulhrsw_a src1 src2)) ;; Helper for creating `pmulhuw` instructions. (decl x64_pmulhuw (Xmm XmmMem) Xmm) -(rule 0 (x64_pmulhuw src1 src2) - (xmm_rm_r (SseOpcode.Pmulhuw) src1 src2)) (rule 1 (x64_pmulhuw src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpmulhuw) src1 src2)) +(rule 0 (x64_pmulhuw src1 src2) (x64_pmulhuw_a src1 src2)) ;; Helper for creating `pmuldq` instructions. (decl x64_pmuldq (Xmm XmmMem) Xmm) -(rule 0 (x64_pmuldq src1 src2) - (xmm_rm_r (SseOpcode.Pmuldq) src1 src2)) (rule 1 (x64_pmuldq src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpmuldq) src1 src2)) +(rule 0 (x64_pmuldq src1 src2) (x64_pmuldq_a src1 src2)) ;; Helper for creating `pmuludq` instructions. (decl x64_pmuludq (Xmm XmmMem) Xmm) -(rule 0 (x64_pmuludq src1 src2) - (xmm_rm_r (SseOpcode.Pmuludq) src1 src2)) (rule 1 (x64_pmuludq src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpmuludq) src1 src2)) +(rule 0 (x64_pmuludq src1 src2) (x64_pmuludq_a src1 src2)) ;; Helper for creating `punpckhwd` instructions. (decl x64_punpckhwd (Xmm XmmMem) Xmm) @@ -3792,35 +3755,31 @@ ;; Helper for creating `mulss` instructions. (decl x64_mulss (Xmm XmmMem) Xmm) -(rule (x64_mulss src1 src2) - (xmm_rm_r_unaligned (SseOpcode.Mulss) src1 src2)) (rule 1 (x64_mulss src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vmulss) src1 src2)) +(rule 0 (x64_mulss src1 src2) (x64_mulss_a src1 src2)) ;; Helper for creating `mulsd` instructions. (decl x64_mulsd (Xmm XmmMem) Xmm) -(rule (x64_mulsd src1 src2) - (xmm_rm_r_unaligned (SseOpcode.Mulsd) src1 src2)) (rule 1 (x64_mulsd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vmulsd) src1 src2)) +(rule 0 (x64_mulsd src1 src2) (x64_mulsd_a src1 src2)) ;; Helper for creating `mulps` instructions. (decl x64_mulps (Xmm XmmMem) Xmm) -(rule 0 (x64_mulps src1 src2) - (xmm_rm_r (SseOpcode.Mulps) src1 src2)) (rule 1 (x64_mulps src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vmulps) src1 src2)) +(rule 0 (x64_mulps src1 src2) (x64_mulps_a src1 src2)) ;; Helper for creating `mulpd` instructions. (decl x64_mulpd (Xmm XmmMem) Xmm) -(rule (x64_mulpd src1 src2) - (xmm_rm_r (SseOpcode.Mulpd) src1 src2)) (rule 1 (x64_mulpd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vmulpd) src1 src2)) +(rule 0 (x64_mulpd src1 src2) (x64_mulpd_a src1 src2)) ;; Helper for creating `divss` instructions. (decl x64_divss (Xmm XmmMem) Xmm) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 7807b805a037..f28ddc444a73 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -366,142 +366,6 @@ pub(crate) fn emit( } } - Inst::Mul { - signed, - size, - src1, - src2, - dst_lo, - dst_hi, - } => { - let src1 = src1.to_reg(); - let dst_lo = dst_lo.to_reg().to_reg(); - let dst_hi = dst_hi.to_reg().to_reg(); - debug_assert_eq!(src1, regs::rax()); - debug_assert_eq!(dst_lo, regs::rax()); - debug_assert_eq!(dst_hi, regs::rdx()); - let src2 = src2.clone().to_reg_mem().clone(); - - let rex_flags = RexFlags::from(*size); - let prefix = match size { - OperandSize::Size16 => LegacyPrefixes::_66, - OperandSize::Size32 => LegacyPrefixes::None, - OperandSize::Size64 => LegacyPrefixes::None, - _ => unreachable!(), - }; - - let subopcode = if *signed { 5 } else { 4 }; - match src2 { - RegMem::Reg { reg } => { - let src = int_reg_enc(reg); - emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags) - } - RegMem::Mem { addr: src } => { - let amode = src.finalize(state.frame_layout(), sink); - emit_std_enc_mem(sink, prefix, 0xF7, 1, subopcode, &amode, rex_flags, 0); - } - } - } - Inst::Mul8 { - signed, - src1, - src2, - dst, - } => { - let src1 = src1.to_reg(); - let dst = dst.to_reg().to_reg(); - debug_assert_eq!(src1, regs::rax()); - debug_assert_eq!(dst, regs::rax()); - let src2 = src2.clone().to_reg_mem().clone(); - - let mut rex_flags = RexFlags::from(OperandSize::Size8); - let prefix = LegacyPrefixes::None; - let subopcode = if *signed { 5 } else { 4 }; - match src2 { - RegMem::Reg { reg } => { - // The intel manual states: - // - // > r/m8 can not be encoded to access the following byte - // > registers if a REX prefix is used: AH, BH, CH, DH - // - // And apparently that also means that a REX prefix must be - // used if it's not one of those registers. - if !(reg == regs::rax() - || reg == regs::rbx() - || reg == regs::rcx() - || reg == regs::rdx()) - { - rex_flags.always_emit(); - } - let src = int_reg_enc(reg); - emit_std_enc_enc(sink, prefix, 0xF6, 1, subopcode, src, rex_flags) - } - RegMem::Mem { addr } => { - let amode = addr.finalize(state.frame_layout(), sink); - emit_std_enc_mem(sink, prefix, 0xF6, 1, subopcode, &amode, rex_flags, 0); - } - } - } - Inst::IMul { - size, - src1, - src2, - dst, - } => { - let src1 = src1.to_reg(); - let dst = dst.to_reg().to_reg(); - debug_assert_eq!(src1, dst); - let src2 = src2.clone().to_reg_mem().clone(); - - let rex = RexFlags::from(*size); - let prefix = LegacyPrefixes::None; - match src2 { - RegMem::Reg { reg } => { - emit_std_reg_reg(sink, prefix, 0x0FAF, 2, dst, reg, rex); - } - - RegMem::Mem { addr } => { - let amode = addr.finalize(state.frame_layout(), sink); - emit_std_reg_mem(sink, prefix, 0x0FAF, 2, dst, &amode, rex, 0); - } - } - } - - Inst::IMulImm { - size, - src1, - src2, - dst, - } => { - let dst = dst.to_reg().to_reg(); - let src1 = src1.clone().to_reg_mem().clone(); - - let rex = RexFlags::from(*size); - let prefix = match size { - // NB: the intel manual doesn't seem to mention this prefix as - // being required - OperandSize::Size16 => LegacyPrefixes::_66, - _ => LegacyPrefixes::None, - }; - let imm_size = if i8::try_from(*src2).is_ok() { - 1 - } else { - if *size == OperandSize::Size16 { 2 } else { 4 } - }; - let opcode = if imm_size == 1 { 0x6B } else { 0x69 }; - match src1 { - RegMem::Reg { reg } => { - emit_std_reg_reg(sink, prefix, opcode, 1, dst, reg, rex); - } - - RegMem::Mem { addr } => { - let amode = addr.finalize(state.frame_layout(), sink); - emit_std_reg_mem(sink, prefix, opcode, 1, dst, &amode, rex, imm_size); - } - } - emit_simm(sink, imm_size, *src2 as u32); - } - Inst::MulX { size, src1, diff --git a/cranelift/codegen/src/isa/x64/inst/external.rs b/cranelift/codegen/src/isa/x64/inst/external.rs index d2b2e09dcb3e..a7ec4c18dd90 100644 --- a/cranelift/codegen/src/isa/x64/inst/external.rs +++ b/cranelift/codegen/src/isa/x64/inst/external.rs @@ -112,6 +112,13 @@ impl From for asm::GprMem { } } +// For Winch ergonomics. +impl From for asm::Gpr { + fn from(gpr: WritableGpr) -> Self { + Self::new(gpr) + } +} + impl asm::AsReg for PairedGpr { fn enc(&self) -> u8 { let PairedGpr { read, write } = self; diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 8f7cf8cfc367..2aec0e5969b2 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -59,7 +59,7 @@ pub struct ReturnCallInfo { fn inst_size_test() { // This test will help with unintentionally growing the size // of the Inst enum. - assert_eq!(48, std::mem::size_of::()); + assert_eq!(56, std::mem::size_of::()); } pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool { @@ -113,10 +113,6 @@ impl Inst { | Inst::MovToPReg { .. } | Inst::MovsxRmR { .. } | Inst::MovzxRmR { .. } - | Inst::Mul { .. } - | Inst::Mul8 { .. } - | Inst::IMul { .. } - | Inst::IMulImm { .. } | Inst::Nop { .. } | Inst::Pop64 { .. } | Inst::Push64 { .. } @@ -197,6 +193,7 @@ impl Inst { sse => features.push(InstructionSet::SSE), sse2 => features.push(InstructionSet::SSE2), ssse3 => features.push(InstructionSet::SSSE3), + sse41 => features.push(InstructionSet::SSE41), } } features @@ -775,27 +772,6 @@ impl PrettyPrint for Inst { format!("{op} {dividend}, {divisor}, {dst} ; trap={trap}") } - Inst::Mul { - size, - signed, - src1, - src2, - dst_lo, - dst_hi, - } => { - let src1 = pretty_print_reg(src1.to_reg(), size.to_bytes()); - let dst_lo = pretty_print_reg(dst_lo.to_reg().to_reg(), size.to_bytes()); - let dst_hi = pretty_print_reg(dst_hi.to_reg().to_reg(), size.to_bytes()); - let src2 = src2.pretty_print(size.to_bytes()); - let suffix = suffix_bwlq(*size); - let op = ljustify(if *signed { - format!("imul{suffix}") - } else { - format!("mul{suffix}") - }); - format!("{op} {src1}, {src2}, {dst_lo}, {dst_hi}") - } - Inst::MulX { size, src1, @@ -816,50 +792,6 @@ impl PrettyPrint for Inst { format!("{op} {src1}, {src2}, {dst_lo}, {dst_hi}") } - Inst::Mul8 { - signed, - src1, - src2, - dst, - } => { - let src1 = pretty_print_reg(src1.to_reg(), 1); - let dst = pretty_print_reg(dst.to_reg().to_reg(), 1); - let src2 = src2.pretty_print(1); - let op = ljustify(if *signed { - "imulb".to_string() - } else { - "mulb".to_string() - }); - format!("{op} {src1}, {src2}, {dst}") - } - - Inst::IMul { - size, - src1, - src2, - dst, - } => { - let src1 = pretty_print_reg(src1.to_reg(), size.to_bytes()); - let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes()); - let src2 = src2.pretty_print(size.to_bytes()); - let suffix = suffix_bwlq(*size); - let op = ljustify(format!("imul{suffix}")); - format!("{op} {src1}, {src2}, {dst}") - } - - Inst::IMulImm { - size, - src1, - src2, - dst, - } => { - let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes()); - let src1 = src1.pretty_print(size.to_bytes()); - let suffix = suffix_bwlq(*size); - let op = ljustify(format!("imul{suffix}")); - format!("{op} {src1}, {src2:#x}, {dst}") - } - Inst::CheckedSRemSeq { size, divisor, @@ -2011,36 +1943,6 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_fixed_use(dividend, regs::rax()); collector.reg_fixed_def(dst, regs::rax()); } - Inst::Mul { - src1, - src2, - dst_lo, - dst_hi, - .. - } => { - collector.reg_fixed_use(src1, regs::rax()); - collector.reg_fixed_def(dst_lo, regs::rax()); - collector.reg_fixed_def(dst_hi, regs::rdx()); - src2.get_operands(collector); - } - Inst::Mul8 { - src1, src2, dst, .. - } => { - collector.reg_fixed_use(src1, regs::rax()); - collector.reg_fixed_def(dst, regs::rax()); - src2.get_operands(collector); - } - Inst::IMul { - src1, src2, dst, .. - } => { - collector.reg_use(src1); - collector.reg_reuse_def(dst, 0); - src2.get_operands(collector); - } - Inst::IMulImm { src1, dst, .. } => { - collector.reg_def(dst); - src1.get_operands(collector); - } Inst::MulX { src1, src2, diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index ea5f0a3da618..d8bdb9038118 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1044,7 +1044,7 @@ (rule -5 (lower (has_type (ty_int_ref_16_to_64 ty) (imul (sinkable_load x) y))) (x64_imul ty y x)) -;; lift out constants to use 3-operand form +;; Lift out constants to use 3-operand form. (rule -4 (lower (has_type (ty_int_ref_16_to_64 ty) (imul x (i32_from_iconst y)))) (x64_imul_imm ty x y)) (rule -3 (lower (has_type (ty_int_ref_16_to_64 ty) (imul (i32_from_iconst x) y))) diff --git a/cranelift/codegen/src/isa/x64/pcc.rs b/cranelift/codegen/src/isa/x64/pcc.rs index 27a074a235ff..f4d9111efa99 100644 --- a/cranelift/codegen/src/isa/x64/pcc.rs +++ b/cranelift/codegen/src/isa/x64/pcc.rs @@ -126,14 +126,7 @@ pub(crate) fn check( undefined_result(ctx, vcode, dst, 64, 64)?; Ok(()) } - Inst::Mul { - size, - dst_lo, - dst_hi, - ref src2, - .. - } - | Inst::MulX { + Inst::MulX { size, dst_lo, dst_hi, @@ -150,46 +143,6 @@ pub(crate) fn check( undefined_result(ctx, vcode, dst_hi, 64, size.to_bits().into())?; Ok(()) } - Inst::Mul8 { dst, ref src2, .. } => { - match <&RegMem>::from(src2) { - RegMem::Mem { addr } => { - check_load(ctx, None, addr, vcode, I8, 64)?; - } - RegMem::Reg { .. } => {} - } - undefined_result(ctx, vcode, dst, 64, 16)?; - Ok(()) - } - Inst::IMul { - size, - dst, - ref src2, - .. - } => { - match <&RegMem>::from(src2) { - RegMem::Mem { addr } => { - check_load(ctx, None, addr, vcode, size.to_type(), 64)?; - } - RegMem::Reg { .. } => {} - } - undefined_result(ctx, vcode, dst, 64, size.to_bits().into())?; - Ok(()) - } - Inst::IMulImm { - size, - dst, - ref src1, - .. - } => { - match <&RegMem>::from(src1) { - RegMem::Mem { addr } => { - check_load(ctx, None, addr, vcode, size.to_type(), 64)?; - } - RegMem::Reg { .. } => {} - } - undefined_result(ctx, vcode, dst, 64, size.to_bits().into())?; - Ok(()) - } Inst::CheckedSRemSeq { dst_quotient, dst_remainder, diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 29bd96e2f2d6..08e05b026229 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -58,6 +58,11 @@ macro_rules! isle_common_prelude_methods { x as i64 } + #[inline] + fn i32_as_u32(&mut self, x: i32) -> Option { + Some(x as u32) + } + #[inline] fn i32_as_i64(&mut self, x: i32) -> i64 { x.into() @@ -933,6 +938,10 @@ macro_rules! isle_common_prelude_methods { u64::try_from(val).ok() } + fn u16_try_from_i32(&mut self, val: i32) -> Option { + u16::try_from(val).ok() + } + fn u16_try_from_u64(&mut self, val: u64) -> Option { u16::try_from(val).ok() } @@ -941,6 +950,10 @@ macro_rules! isle_common_prelude_methods { u32::try_from(val).ok() } + fn i8_try_from_i32(&mut self, val: i32) -> Option { + i8::try_from(val).ok() + } + fn i8_try_from_u64(&mut self, val: u64) -> Option { i8::try_from(val).ok() } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 1734c6881f88..4841c754545a 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -124,6 +124,9 @@ (decl pure partial u8_try_from_i32 (i32) u8) (extern constructor u8_try_from_i32 u8_try_from_i32) +(decl u16_try_from_i32 (u16) i32) +(extern extractor u16_try_from_i32 u16_try_from_i32) + (decl pure partial u16_try_from_u64 (u64) u16) (extern constructor u16_try_from_u64 u16_try_from_u64) @@ -133,6 +136,9 @@ (decl pure partial u64_try_from_i64 (i64) u64) (extern constructor u64_try_from_i64 u64_try_from_i64) +(decl i8_try_from_i32 (i8) i32) +(extern extractor i8_try_from_i32 i8_try_from_i32) + (decl pure partial i8_try_from_u64 (u64) i8) (extern constructor i8_try_from_u64 i8_try_from_u64) @@ -146,6 +152,9 @@ (extern constructor u32_as_u64 u32_as_u64) (convert u32 u64 u32_as_u64) +(decl i32_as_u32 (u32) i32) +(extern extractor i32_as_u32 i32_as_u32) + (decl pure i32_as_i64 (i32) i64) (extern constructor i32_as_i64 i32_as_i64) (convert i32 i64 i32_as_i64) diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index dcf7b3c8d134..b81a39ffcd6e 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -201,13 +201,13 @@ block0(v0: i128, v1: i128): ; block0: ; movq %rdx, %rax ; movq %rdi, %rdx -; imulq %rdx, %rcx, %rdx +; imulq %rcx, %rdx ; movq %rax, %rcx -; imulq %rsi, %rcx, %rsi +; imulq %rcx, %rsi ; addq %rsi, %rdx ; movq %rdi, %rax ; movq %rdx, %r8 -; mulq %rax, %rcx, %rax, %rdx +; mulq %rcx ;; implicit: %rax, %rdx ; movq %rdx, %rcx ; movq %r8, %rdx ; addq %rcx, %rdx @@ -774,7 +774,7 @@ block0(v0: i128): ; movabsq $1085102592571150095, %rdi ; andq %rdi, %rax ; movabsq $72340172838076673, %rdx -; imulq %rax, %rdx, %rax +; imulq %rdx, %rax ; shrq $56, %rax, %rax ; movq %rsi, %rdi ; shrq $1, %rdi, %rdi @@ -793,7 +793,7 @@ block0(v0: i128): ; movabsq $1085102592571150095, %r10 ; andq %r10, %rdi ; movabsq $72340172838076673, %rcx -; imulq %rdi, %rcx, %rdi +; imulq %rcx, %rdi ; shrq $56, %rdi, %rdi ; addq %rdi, %rax ; uninit %rdx @@ -1904,7 +1904,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mulq %rax, %rsi, %rax, %rdx +; mulq %rsi ;; implicit: %rax, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1968,7 +1968,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mulq %rax, %rsi, %rax, %rdx +; mulq %rsi ;; implicit: %rax, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1997,7 +1997,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imulq %rax, %rsi, %rax, %rdx +; imulq %rsi ;; implicit: %rax, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/mul.clif b/cranelift/filetests/filetests/isa/x64/mul.clif index ec771cddbaef..e2354299e907 100644 --- a/cranelift/filetests/filetests/isa/x64/mul.clif +++ b/cranelift/filetests/filetests/isa/x64/mul.clif @@ -13,7 +13,7 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mulb %al, %sil, %al +; mulb %sil ;; implicit: %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -40,7 +40,7 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imulw %ax, %si, %ax +; imulw %si, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -51,7 +51,7 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; imull %esi, %eax +; imulw %si, %ax ; movq %rbp, %rsp ; popq %rbp ; retq @@ -67,7 +67,7 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imull %eax, %esi, %eax +; imull %esi, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -94,7 +94,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imulq %rax, %rsi, %rax +; imulq %rsi, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -122,8 +122,8 @@ block0(v0: i8, v1: i8, v2: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mulb %al, %sil, %al -; mulb %al, %dl, %al +; mulb %sil ;; implicit: %ax +; mulb %dl ;; implicit: %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -151,9 +151,9 @@ block0(v0: i32, v1: i32, v2: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; imull %edi, %esi, %edi +; imull %esi, %edi ; movq %rdi, %rax -; imull %eax, %edx, %eax +; imull %edx, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -182,7 +182,7 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imull %eax, 0(%rsi), %eax +; imull (%rsi), %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -210,7 +210,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imulq %rax, 0(%rsi), %rax +; imulq (%rsi), %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -237,7 +237,7 @@ block0(v0: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mulb %al, const(0), %al +; mulb (%rip) ;; implicit: %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -248,12 +248,15 @@ block0(v0: i8): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; mulb 0xb(%rip) +; mulb 0xa(%rip) ; movq %rbp, %rsp ; popq %rbp ; retq ; addb %al, (%rax) ; addb %al, (%rax) +; addb %ah, (%rcx) +; addb %al, (%rax) +; addb %al, (%rax) ; addb %al, (%rax) function %imul_i16_const(i16) -> i16{ @@ -266,7 +269,7 @@ block0(v0: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; imulw %di, 0x61, %ax +; imulw $0x61, %di, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -291,7 +294,7 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; imull %edi, 0x61, %eax +; imull $0x61, %edi, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -316,7 +319,7 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; imulq %rdi, 0x61, %rax +; imulq $0x61, %rdi, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -331,7 +334,6 @@ block0(v0: i64): ; popq %rbp ; retq - function %imul_i16_bigger_const(i16) -> i16{ block0(v0: i16): v3 = imul_imm v0, 1021 @@ -342,7 +344,7 @@ block0(v0: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; imulw %di, 0x3fd, %ax +; imulw $0x3fd, %di, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -367,7 +369,7 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; imull %edi, 0x3fd, %eax +; imull $0x3fd, %edi, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -392,7 +394,7 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; imulq %rdi, 0x3fd, %rax +; imulq $0x3fd, %rdi, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -419,7 +421,7 @@ block0(v0: i64): ; movq %rsp, %rbp ; block0: ; movzwq 0(%rdi), %rcx -; imulw %cx, 0x3fd, %ax +; imulw $0x3fd, %cx, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -446,7 +448,7 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; imull 0(%rdi), 0x3fd, %eax +; imull $0x3fd, (%rdi), %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -472,7 +474,7 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; imulq 100(%rdi), 0x3fd, %rax +; imulq $0x3fd, 0x64(%rdi), %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -487,7 +489,6 @@ block0(v0: i64): ; popq %rbp ; retq - function %widening_smul_from_8bit(i8, i8) -> i16 { block0(v0: i8, v1: i8): v2 = sextend.i16 v0 @@ -501,7 +502,7 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imulb %al, %sil, %al +; imulb %sil ;; implicit: %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -530,7 +531,7 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mulb %al, %sil, %al +; mulb %sil ;; implicit: %ax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index ea60ebd9c923..91d15fb77ee2 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -28,7 +28,7 @@ block0(v0: i64): ; movabsq $1085102592571150095, %r11 ; andq %r11, %rax ; movabsq $72340172838076673, %rcx -; imulq %rax, %rcx, %rax +; imulq %rcx, %rax ; shrq $56, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -91,7 +91,7 @@ block0(v0: i64): ; movabsq $1085102592571150095, %rsi ; andq %rsi, %rax ; movabsq $72340172838076673, %rdx -; imulq %rax, %rdx, %rax +; imulq %rdx, %rax ; shrq $56, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -151,7 +151,7 @@ block0(v0: i32): ; shrl $4, %r9d, %r9d ; addl %edi, %r9d ; andl $0xf0f0f0f, %r9d -; imull %r9d, 0x1010101, %eax +; imull $0x1010101, %r9d, %eax ; shrl $24, %eax, %eax ; movq %rbp, %rsp ; popq %rbp @@ -210,7 +210,7 @@ block0(v0: i64): ; shrl $4, %r10d, %r10d ; addl %eax, %r10d ; andl $0xf0f0f0f, %r10d -; imull %r10d, 0x1010101, %eax +; imull $0x1010101, %r10d, %eax ; shrl $24, %eax, %eax ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif b/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif index 62f492600e24..cc6aea11e202 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif @@ -17,7 +17,7 @@ block0(v0: i8x16, v1: i8x16): ; pmovsxbw %xmm0, %xmm0 ; palignr $8, %xmm1, %xmm1, %xmm1 ; pmovsxbw %xmm1, %xmm1 -; pmullw %xmm0, %xmm1, %xmm0 +; pmullw %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -49,8 +49,8 @@ block0(v0: i16x8, v1: i16x8): ; movq %rsp, %rbp ; block0: ; movdqa %xmm0, %xmm5 -; pmullw %xmm5, %xmm1, %xmm5 -; pmulhw %xmm0, %xmm1, %xmm0 +; pmullw %xmm1, %xmm5 +; pmulhw %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 ; movdqa %xmm5, %xmm0 ; punpckhwd %xmm0, %xmm2, %xmm0 @@ -87,7 +87,7 @@ block0(v0: i32x4, v1: i32x4): ; block0: ; pshufd $250, %xmm0, %xmm0 ; pshufd $250, %xmm1, %xmm5 -; pmuldq %xmm0, %xmm5, %xmm0 +; pmuldq %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -118,7 +118,7 @@ block0(v0: i8x16, v1: i8x16): ; block0: ; pmovsxbw %xmm0, %xmm0 ; pmovsxbw %xmm1, %xmm5 -; pmullw %xmm0, %xmm5, %xmm0 +; pmullw %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -148,8 +148,8 @@ block0(v0: i16x8, v1: i16x8): ; movq %rsp, %rbp ; block0: ; movdqa %xmm0, %xmm5 -; pmullw %xmm5, %xmm1, %xmm5 -; pmulhw %xmm0, %xmm1, %xmm0 +; pmullw %xmm1, %xmm5 +; pmulhw %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 ; movdqa %xmm5, %xmm0 ; punpcklwd %xmm0, %xmm2, %xmm0 @@ -186,7 +186,7 @@ block0(v0: i32x4, v1: i32x4): ; block0: ; pshufd $80, %xmm0, %xmm0 ; pshufd $80, %xmm1, %xmm5 -; pmuldq %xmm0, %xmm5, %xmm0 +; pmuldq %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -221,7 +221,7 @@ block0(v0: i8x16, v1: i8x16): ; uninit %xmm2 ; pxor %xmm2, %xmm2 ; punpckhbw %xmm1, %xmm2, %xmm1 -; pmullw %xmm0, %xmm1, %xmm0 +; pmullw %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -253,8 +253,8 @@ block0(v0: i16x8, v1: i16x8): ; movq %rsp, %rbp ; block0: ; movdqa %xmm0, %xmm5 -; pmullw %xmm5, %xmm1, %xmm5 -; pmulhuw %xmm0, %xmm1, %xmm0 +; pmullw %xmm1, %xmm5 +; pmulhuw %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 ; movdqa %xmm5, %xmm0 ; punpckhwd %xmm0, %xmm2, %xmm0 @@ -291,7 +291,7 @@ block0(v0: i32x4, v1: i32x4): ; block0: ; pshufd $250, %xmm0, %xmm0 ; pshufd $250, %xmm1, %xmm5 -; pmuludq %xmm0, %xmm5, %xmm0 +; pmuludq %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -322,7 +322,7 @@ block0(v0: i8x16, v1: i8x16): ; block0: ; pmovzxbw %xmm0, %xmm0 ; pmovzxbw %xmm1, %xmm5 -; pmullw %xmm0, %xmm5, %xmm0 +; pmullw %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -352,8 +352,8 @@ block0(v0: i16x8, v1: i16x8): ; movq %rsp, %rbp ; block0: ; movdqa %xmm0, %xmm5 -; pmullw %xmm5, %xmm1, %xmm5 -; pmulhuw %xmm0, %xmm1, %xmm0 +; pmullw %xmm1, %xmm5 +; pmulhuw %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 ; movdqa %xmm5, %xmm0 ; punpcklwd %xmm0, %xmm2, %xmm0 @@ -390,7 +390,7 @@ block0(v0: i32x4, v1: i32x4): ; block0: ; pshufd $80, %xmm0, %xmm0 ; pshufd $80, %xmm1, %xmm5 -; pmuludq %xmm0, %xmm5, %xmm0 +; pmuludq %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/smulhi.clif b/cranelift/filetests/filetests/isa/x64/smulhi.clif index 92589c2f0cfa..926320ece5ed 100644 --- a/cranelift/filetests/filetests/isa/x64/smulhi.clif +++ b/cranelift/filetests/filetests/isa/x64/smulhi.clif @@ -12,7 +12,7 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imulb %al, %sil, %al +; imulb %sil ;; implicit: %ax ; sarw $8, %ax, %ax ; movq %rbp, %rsp ; popq %rbp @@ -41,7 +41,7 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imulw %ax, %si, %ax, %dx +; imulw %si ;; implicit: %ax, %dx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -70,7 +70,7 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imull %eax, %esi, %eax, %edx +; imull %esi ;; implicit: %eax, %edx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -99,7 +99,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; imulq %rax, %rsi, %rax, %rdx +; imulq %rsi ;; implicit: %rax, %rdx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif b/cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif index e82d95a1a12f..4faa482dee52 100644 --- a/cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif +++ b/cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif @@ -11,7 +11,7 @@ block0(v0: i16x8, v1: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; pmulhrsw %xmm0, %xmm1, %xmm0 +; pmulhrsw %xmm1, %xmm0 ; movdqa %xmm0, %xmm5 ; pcmpeqw %xmm5, const(0), %xmm5 ; pxor %xmm5, %xmm0 diff --git a/cranelift/filetests/filetests/isa/x64/umulhi.clif b/cranelift/filetests/filetests/isa/x64/umulhi.clif index e68df725c1b8..9d62cd532119 100644 --- a/cranelift/filetests/filetests/isa/x64/umulhi.clif +++ b/cranelift/filetests/filetests/isa/x64/umulhi.clif @@ -12,7 +12,7 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mulb %al, %sil, %al +; mulb %sil ;; implicit: %ax ; shrw $8, %ax, %ax ; movq %rbp, %rsp ; popq %rbp @@ -41,7 +41,7 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mulw %ax, %si, %ax, %dx +; mulw %si ;; implicit: %ax, %dx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -70,7 +70,7 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mull %eax, %esi, %eax, %edx +; mull %esi ;; implicit: %eax, %edx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -99,7 +99,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; mulq %rax, %rsi, %rax, %rdx +; mulq %rsi ;; implicit: %rax, %rdx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp diff --git a/tests/disas/winch/x64/call_indirect/call_indirect.wat b/tests/disas/winch/x64/call_indirect/call_indirect.wat index 7ae7217d5c3b..eaff66c2b1fd 100644 --- a/tests/disas/winch/x64/call_indirect/call_indirect.wat +++ b/tests/disas/winch/x64/call_indirect/call_indirect.wat @@ -37,7 +37,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x30, %r11 ;; cmpq %rsp, %r11 -;; ja 0x227 +;; ja 0x22d ;; 1c: movq %rdi, %r14 ;; subq $0x20, %rsp ;; movq %rdi, 0x18(%rsp) @@ -50,7 +50,7 @@ ;; testl %eax, %eax ;; je 0x56 ;; 4c: movl $1, %eax -;; jmp 0x21e +;; jmp 0x224 ;; 56: movl 0xc(%rsp), %eax ;; subl $2, %eax ;; subq $4, %rsp @@ -59,7 +59,7 @@ ;; movq %r14, %rdx ;; movq 0x40(%rdx), %rbx ;; cmpq %rbx, %rcx -;; jae 0x229 +;; jae 0x22f ;; 7f: movq %rcx, %r11 ;; imulq $8, %r11, %r11 ;; movq 0x38(%rdx), %rdx @@ -69,27 +69,27 @@ ;; cmovaeq %rsi, %rdx ;; movq (%rdx), %rax ;; testq %rax, %rax -;; jne 0xdc -;; a2: subq $4, %rsp +;; jne 0xdf +;; a5: subq $4, %rsp ;; movl %ecx, (%rsp) ;; subq $8, %rsp ;; movq %r14, %rdi ;; movl $0, %esi ;; movl 8(%rsp), %edx -;; callq 0x32e +;; callq 0x334 ;; addq $8, %rsp ;; addq $4, %rsp ;; movq 0x1c(%rsp), %r14 -;; jmp 0xe3 -;; dc: andq $0xfffffffffffffffe, %rax +;; jmp 0xe6 +;; df: andq $0xfffffffffffffffe, %rax ;; testq %rax, %rax -;; je 0x22b -;; ec: movq 0x30(%r14), %r11 +;; je 0x231 +;; ef: movq 0x30(%r14), %r11 ;; movl (%r11), %ecx ;; movl 0x10(%rax), %edx ;; cmpl %edx, %ecx -;; jne 0x22d -;; fe: pushq %rax +;; jne 0x233 +;; 101: pushq %rax ;; popq %rcx ;; movq 0x18(%rcx), %r8 ;; movq 8(%rcx), %rbx @@ -111,8 +111,8 @@ ;; movq %r14, %rdx ;; movq 0x40(%rdx), %rbx ;; cmpq %rbx, %rcx -;; jae 0x22f -;; 161: movq %rcx, %r11 +;; jae 0x235 +;; 164: movq %rcx, %r11 ;; imulq $8, %r11, %r11 ;; movq 0x38(%rdx), %rdx ;; movq %rdx, %rsi @@ -121,27 +121,27 @@ ;; cmovaeq %rsi, %rdx ;; movq (%rdx), %rax ;; testq %rax, %rax -;; jne 0x1be -;; 184: subq $4, %rsp +;; jne 0x1c4 +;; 18a: subq $4, %rsp ;; movl %ecx, (%rsp) ;; subq $4, %rsp ;; movq %r14, %rdi ;; movl $0, %esi ;; movl 4(%rsp), %edx -;; callq 0x32e +;; callq 0x334 ;; addq $4, %rsp ;; addq $4, %rsp ;; movq 0x20(%rsp), %r14 -;; jmp 0x1c5 -;; 1be: andq $0xfffffffffffffffe, %rax +;; jmp 0x1cb +;; 1c4: andq $0xfffffffffffffffe, %rax ;; testq %rax, %rax -;; je 0x231 -;; 1ce: movq 0x30(%r14), %r11 +;; je 0x237 +;; 1d4: movq 0x30(%r14), %r11 ;; movl (%r11), %ecx ;; movl 0x10(%rax), %edx ;; cmpl %edx, %ecx -;; jne 0x233 -;; 1e0: pushq %rax +;; jne 0x239 +;; 1e6: pushq %rax ;; popq %rcx ;; movq 0x18(%rcx), %r8 ;; movq 8(%rcx), %rbx @@ -160,10 +160,10 @@ ;; addq $0x20, %rsp ;; popq %rbp ;; retq -;; 227: ud2 -;; 229: ud2 -;; 22b: ud2 ;; 22d: ud2 ;; 22f: ud2 ;; 231: ud2 ;; 233: ud2 +;; 235: ud2 +;; 237: ud2 +;; 239: ud2 diff --git a/tests/disas/winch/x64/call_indirect/local_arg.wat b/tests/disas/winch/x64/call_indirect/local_arg.wat index a8ec21d6863d..e4c69d6f8f33 100644 --- a/tests/disas/winch/x64/call_indirect/local_arg.wat +++ b/tests/disas/winch/x64/call_indirect/local_arg.wat @@ -42,7 +42,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x30, %r11 ;; cmpq %rsp, %r11 -;; ja 0x154 +;; ja 0x157 ;; 5c: movq %rdi, %r14 ;; subq $0x20, %rsp ;; movq %rdi, 0x18(%rsp) @@ -55,7 +55,7 @@ ;; movq %r14, %rdx ;; movq 0x40(%rdx), %rbx ;; cmpq %rbx, %rcx -;; jae 0x156 +;; jae 0x159 ;; 9e: movq %rcx, %r11 ;; imulq $8, %r11, %r11 ;; movq 0x38(%rdx), %rdx @@ -65,27 +65,27 @@ ;; cmovaeq %rsi, %rdx ;; movq (%rdx), %rax ;; testq %rax, %rax -;; jne 0xfb -;; c1: subq $4, %rsp +;; jne 0xfe +;; c4: subq $4, %rsp ;; movl %ecx, (%rsp) ;; subq $8, %rsp ;; movq %r14, %rdi ;; movl $0, %esi ;; movl 8(%rsp), %edx -;; callq 0x312 +;; callq 0x315 ;; addq $8, %rsp ;; addq $4, %rsp ;; movq 0x1c(%rsp), %r14 -;; jmp 0x102 -;; fb: andq $0xfffffffffffffffe, %rax +;; jmp 0x105 +;; fe: andq $0xfffffffffffffffe, %rax ;; testq %rax, %rax -;; je 0x158 -;; 10b: movq 0x30(%r14), %r11 +;; je 0x15b +;; 10e: movq 0x30(%r14), %r11 ;; movl (%r11), %ecx ;; movl 0x10(%rax), %edx ;; cmpl %edx, %ecx -;; jne 0x15a -;; 11d: movq 0x18(%rax), %rbx +;; jne 0x15d +;; 120: movq 0x18(%rax), %rbx ;; movq 8(%rax), %rcx ;; subq $0xc, %rsp ;; movq %rbx, %rdi @@ -98,7 +98,7 @@ ;; addq $0x20, %rsp ;; popq %rbp ;; retq -;; 154: ud2 -;; 156: ud2 -;; 158: ud2 -;; 15a: ud2 +;; 157: ud2 +;; 159: ud2 +;; 15b: ud2 +;; 15d: ud2 diff --git a/tests/disas/winch/x64/i32_mul/const.wat b/tests/disas/winch/x64/i32_mul/const.wat index 82302285ddb9..76133ce4dc4c 100644 --- a/tests/disas/winch/x64/i32_mul/const.wat +++ b/tests/disas/winch/x64/i32_mul/const.wat @@ -15,7 +15,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x40 +;; ja 0x43 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -25,4 +25,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 40: ud2 +;; 43: ud2 diff --git a/tests/disas/winch/x64/i32_mul/max.wat b/tests/disas/winch/x64/i32_mul/max.wat index e888b8afcb3e..6696b007fe7f 100644 --- a/tests/disas/winch/x64/i32_mul/max.wat +++ b/tests/disas/winch/x64/i32_mul/max.wat @@ -14,14 +14,14 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x40 +;; ja 0x43 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) ;; movq %rsi, (%rsp) ;; movl $0x7fffffff, %eax -;; imull $-1, %eax, %eax +;; imull $0xffffffff, %eax, %eax ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 40: ud2 +;; 43: ud2 diff --git a/tests/disas/winch/x64/i32_mul/max_one.wat b/tests/disas/winch/x64/i32_mul/max_one.wat index 3c9731f5a67d..725f0ccc9514 100644 --- a/tests/disas/winch/x64/i32_mul/max_one.wat +++ b/tests/disas/winch/x64/i32_mul/max_one.wat @@ -15,14 +15,14 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x40 +;; ja 0x43 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) ;; movq %rsi, (%rsp) ;; movl $0x80000000, %eax -;; imull $-1, %eax, %eax +;; imull $0xffffffff, %eax, %eax ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 40: ud2 +;; 43: ud2 diff --git a/tests/disas/winch/x64/i32_mul/mixed.wat b/tests/disas/winch/x64/i32_mul/mixed.wat index 8a5fa104ed8f..ff0cdf751ace 100644 --- a/tests/disas/winch/x64/i32_mul/mixed.wat +++ b/tests/disas/winch/x64/i32_mul/mixed.wat @@ -15,7 +15,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x40 +;; ja 0x43 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -25,4 +25,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 40: ud2 +;; 43: ud2 diff --git a/tests/disas/winch/x64/i32_mul/signed.wat b/tests/disas/winch/x64/i32_mul/signed.wat index aee427ac97e0..25ac41c5438c 100644 --- a/tests/disas/winch/x64/i32_mul/signed.wat +++ b/tests/disas/winch/x64/i32_mul/signed.wat @@ -15,14 +15,14 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x40 +;; ja 0x43 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) ;; movq %rsi, (%rsp) ;; movl $0xffffffff, %eax -;; imull $-1, %eax, %eax +;; imull $0xffffffff, %eax, %eax ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 40: ud2 +;; 43: ud2 diff --git a/tests/disas/winch/x64/i32_mul/unsigned_with_zero.wat b/tests/disas/winch/x64/i32_mul/unsigned_with_zero.wat index 8f455eb280f9..c1bdcedaea60 100644 --- a/tests/disas/winch/x64/i32_mul/unsigned_with_zero.wat +++ b/tests/disas/winch/x64/i32_mul/unsigned_with_zero.wat @@ -15,7 +15,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x40 +;; ja 0x43 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -25,4 +25,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 40: ud2 +;; 43: ud2 diff --git a/tests/disas/winch/x64/i64_mul/const.wat b/tests/disas/winch/x64/i64_mul/const.wat index febd091b8204..50afc1ecc1d8 100644 --- a/tests/disas/winch/x64/i64_mul/const.wat +++ b/tests/disas/winch/x64/i64_mul/const.wat @@ -15,7 +15,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x43 +;; ja 0x46 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -25,4 +25,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 43: ud2 +;; 46: ud2 diff --git a/tests/disas/winch/x64/i64_mul/max.wat b/tests/disas/winch/x64/i64_mul/max.wat index b0c0c42ea731..a24c3956e8e2 100644 --- a/tests/disas/winch/x64/i64_mul/max.wat +++ b/tests/disas/winch/x64/i64_mul/max.wat @@ -14,7 +14,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x46 +;; ja 0x49 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -24,4 +24,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 46: ud2 +;; 49: ud2 diff --git a/tests/disas/winch/x64/i64_mul/max_one.wat b/tests/disas/winch/x64/i64_mul/max_one.wat index 8e2c3bee4b97..165f02bbdf1b 100644 --- a/tests/disas/winch/x64/i64_mul/max_one.wat +++ b/tests/disas/winch/x64/i64_mul/max_one.wat @@ -15,7 +15,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x46 +;; ja 0x49 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -25,4 +25,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 46: ud2 +;; 49: ud2 diff --git a/tests/disas/winch/x64/i64_mul/mixed.wat b/tests/disas/winch/x64/i64_mul/mixed.wat index 3230b4eb47ee..eeecef325c78 100644 --- a/tests/disas/winch/x64/i64_mul/mixed.wat +++ b/tests/disas/winch/x64/i64_mul/mixed.wat @@ -15,7 +15,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x43 +;; ja 0x46 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -25,4 +25,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 43: ud2 +;; 46: ud2 diff --git a/tests/disas/winch/x64/i64_mul/signed.wat b/tests/disas/winch/x64/i64_mul/signed.wat index 610fade1ee5c..15348f1440bb 100644 --- a/tests/disas/winch/x64/i64_mul/signed.wat +++ b/tests/disas/winch/x64/i64_mul/signed.wat @@ -15,7 +15,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x43 +;; ja 0x46 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -25,4 +25,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 43: ud2 +;; 46: ud2 diff --git a/tests/disas/winch/x64/i64_mul/unsigned_with_zero.wat b/tests/disas/winch/x64/i64_mul/unsigned_with_zero.wat index bbb2378263c3..3d27c6b9263c 100644 --- a/tests/disas/winch/x64/i64_mul/unsigned_with_zero.wat +++ b/tests/disas/winch/x64/i64_mul/unsigned_with_zero.wat @@ -15,7 +15,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0x43 +;; ja 0x46 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -25,4 +25,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 43: ud2 +;; 46: ud2 diff --git a/tests/disas/winch/x64/loop/as_binary_operand.wat b/tests/disas/winch/x64/loop/as_binary_operand.wat index 8b0fd207f201..e7f2b6c011d1 100644 --- a/tests/disas/winch/x64/loop/as_binary_operand.wat +++ b/tests/disas/winch/x64/loop/as_binary_operand.wat @@ -33,7 +33,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x10, %r11 ;; cmpq %rsp, %r11 -;; ja 0xa0 +;; ja 0xa3 ;; 5c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -51,4 +51,4 @@ ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; a0: ud2 +;; a3: ud2 diff --git a/tests/disas/winch/x64/loop/effects.wat b/tests/disas/winch/x64/loop/effects.wat index fb34e8d14b75..38f85c5960e4 100644 --- a/tests/disas/winch/x64/loop/effects.wat +++ b/tests/disas/winch/x64/loop/effects.wat @@ -23,7 +23,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x20, %r11 ;; cmpq %rsp, %r11 -;; ja 0x7f +;; ja 0x85 ;; 1c: movq %rdi, %r14 ;; subq $0x20, %rsp ;; movq %rdi, 0x18(%rsp) @@ -47,4 +47,4 @@ ;; addq $0x20, %rsp ;; popq %rbp ;; retq -;; 7f: ud2 +;; 85: ud2 diff --git a/tests/disas/winch/x64/table/fill.wat b/tests/disas/winch/x64/table/fill.wat index 6b6ac36a698d..4ffee7d4733e 100644 --- a/tests/disas/winch/x64/table/fill.wat +++ b/tests/disas/winch/x64/table/fill.wat @@ -78,7 +78,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x40, %r11 ;; cmpq %rsp, %r11 -;; ja 0x1f7 +;; ja 0x1fa ;; dc: movq %rdi, %r14 ;; subq $0x30, %rsp ;; movq %rdi, 0x28(%rsp) @@ -96,7 +96,7 @@ ;; movq %r14, %rdx ;; movq 0x40(%rdx), %rbx ;; cmpq %rbx, %rcx -;; jae 0x1f9 +;; jae 0x1fc ;; 138: movq %rcx, %r11 ;; imulq $8, %r11, %r11 ;; movq 0x38(%rdx), %rdx @@ -106,19 +106,19 @@ ;; cmovaeq %rsi, %rdx ;; movq (%rdx), %rax ;; testq %rax, %rax -;; jne 0x195 -;; 15b: subq $4, %rsp +;; jne 0x198 +;; 15e: subq $4, %rsp ;; movl %ecx, (%rsp) ;; subq $0xc, %rsp ;; movq %r14, %rdi ;; movl $0, %esi ;; movl 0xc(%rsp), %edx -;; callq 0x495 +;; callq 0x498 ;; addq $0xc, %rsp ;; addq $4, %rsp ;; movq 0x28(%rsp), %r14 -;; jmp 0x19c -;; 195: andq $0xfffffffffffffffe, %rax +;; jmp 0x19f +;; 198: andq $0xfffffffffffffffe, %rax ;; movq %rax, 0xc(%rsp) ;; movl 0x1c(%rsp), %r11d ;; subq $4, %rsp @@ -133,11 +133,11 @@ ;; movl 0xc(%rsp), %edx ;; movq 4(%rsp), %rcx ;; movl (%rsp), %r8d -;; callq 0x4d6 +;; callq 0x4d9 ;; addq $0x10, %rsp ;; movq 0x28(%rsp), %r14 ;; addq $0x30, %rsp ;; popq %rbp ;; retq -;; 1f7: ud2 -;; 1f9: ud2 +;; 1fa: ud2 +;; 1fc: ud2 diff --git a/tests/disas/winch/x64/table/get.wat b/tests/disas/winch/x64/table/get.wat index da146be17436..ebd3f3453f8e 100644 --- a/tests/disas/winch/x64/table/get.wat +++ b/tests/disas/winch/x64/table/get.wat @@ -34,7 +34,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x30, %r11 ;; cmpq %rsp, %r11 -;; ja 0x10b +;; ja 0x10e ;; 5c: movq %rdi, %r14 ;; subq $0x20, %rsp ;; movq %rdi, 0x18(%rsp) @@ -48,7 +48,7 @@ ;; movq %r14, %rdx ;; movq 0x40(%rdx), %rbx ;; cmpq %rbx, %rcx -;; jae 0x10d +;; jae 0x110 ;; 9e: movq %rcx, %r11 ;; imulq $8, %r11, %r11 ;; movq 0x38(%rdx), %rdx @@ -58,21 +58,21 @@ ;; cmovaeq %rsi, %rdx ;; movq (%rdx), %rax ;; testq %rax, %rax -;; jne 0xfb -;; c1: subq $4, %rsp +;; jne 0xfe +;; c4: subq $4, %rsp ;; movl %ecx, (%rsp) ;; subq $0xc, %rsp ;; movq %r14, %rdi ;; movl $0, %esi ;; movl 0xc(%rsp), %edx -;; callq 0x2d8 +;; callq 0x2db ;; addq $0xc, %rsp ;; addq $4, %rsp ;; movq 0x18(%rsp), %r14 -;; jmp 0x102 -;; fb: andq $0xfffffffffffffffe, %rax +;; jmp 0x105 +;; fe: andq $0xfffffffffffffffe, %rax ;; addq $0x20, %rsp ;; popq %rbp ;; retq -;; 10b: ud2 -;; 10d: ud2 +;; 10e: ud2 +;; 110: ud2 diff --git a/tests/disas/winch/x64/table/init_copy_drop.wat b/tests/disas/winch/x64/table/init_copy_drop.wat index 6e033b7153d1..c14b230b382f 100644 --- a/tests/disas/winch/x64/table/init_copy_drop.wat +++ b/tests/disas/winch/x64/table/init_copy_drop.wat @@ -142,11 +142,11 @@ ;; movl $7, %ecx ;; movl $0, %r8d ;; movl $4, %r9d -;; callq 0x8aa +;; callq 0x8ad ;; movq 8(%rsp), %r14 ;; movq %r14, %rdi ;; movl $1, %esi -;; callq 0x909 +;; callq 0x90c ;; movq 8(%rsp), %r14 ;; movq %r14, %rdi ;; movl $0, %esi @@ -154,11 +154,11 @@ ;; movl $0xf, %ecx ;; movl $1, %r8d ;; movl $3, %r9d -;; callq 0x8aa +;; callq 0x8ad ;; movq 8(%rsp), %r14 ;; movq %r14, %rdi ;; movl $3, %esi -;; callq 0x909 +;; callq 0x90c ;; movq 8(%rsp), %r14 ;; movq %r14, %rdi ;; movl $0, %esi @@ -166,7 +166,7 @@ ;; movl $0x14, %ecx ;; movl $0xf, %r8d ;; movl $5, %r9d -;; callq 0x948 +;; callq 0x94b ;; movq 8(%rsp), %r14 ;; movq %r14, %rdi ;; movl $0, %esi @@ -174,7 +174,7 @@ ;; movl $0x15, %ecx ;; movl $0x1d, %r8d ;; movl $1, %r9d -;; callq 0x948 +;; callq 0x94b ;; movq 8(%rsp), %r14 ;; movq %r14, %rdi ;; movl $0, %esi @@ -182,7 +182,7 @@ ;; movl $0x18, %ecx ;; movl $0xa, %r8d ;; movl $1, %r9d -;; callq 0x948 +;; callq 0x94b ;; movq 8(%rsp), %r14 ;; movq %r14, %rdi ;; movl $0, %esi @@ -190,7 +190,7 @@ ;; movl $0xd, %ecx ;; movl $0xb, %r8d ;; movl $4, %r9d -;; callq 0x948 +;; callq 0x94b ;; movq 8(%rsp), %r14 ;; movq %r14, %rdi ;; movl $0, %esi @@ -198,7 +198,7 @@ ;; movl $0x13, %ecx ;; movl $0x14, %r8d ;; movl $5, %r9d -;; callq 0x948 +;; callq 0x94b ;; movq 8(%rsp), %r14 ;; addq $0x10, %rsp ;; popq %rbp @@ -212,7 +212,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x30, %r11 ;; cmpq %rsp, %r11 -;; ja 0x3c3 +;; ja 0x3c6 ;; 2dc: movq %rdi, %r14 ;; subq $0x20, %rsp ;; movq %rdi, 0x18(%rsp) @@ -226,7 +226,7 @@ ;; movq %r14, %rdx ;; movq 0xb8(%rdx), %rbx ;; cmpq %rbx, %rcx -;; jae 0x3c5 +;; jae 0x3c8 ;; 321: movq %rcx, %r11 ;; imulq $8, %r11, %r11 ;; movq 0xb0(%rdx), %rdx @@ -236,27 +236,27 @@ ;; cmovaeq %rsi, %rdx ;; movq (%rdx), %rax ;; testq %rax, %rax -;; jne 0x381 -;; 347: subq $4, %rsp +;; jne 0x384 +;; 34a: subq $4, %rsp ;; movl %ecx, (%rsp) ;; subq $0xc, %rsp ;; movq %r14, %rdi ;; movl $0, %esi ;; movl 0xc(%rsp), %edx -;; callq 0x9a7 +;; callq 0x9aa ;; addq $0xc, %rsp ;; addq $4, %rsp ;; movq 0x18(%rsp), %r14 -;; jmp 0x388 -;; 381: andq $0xfffffffffffffffe, %rax +;; jmp 0x38b +;; 384: andq $0xfffffffffffffffe, %rax ;; testq %rax, %rax -;; je 0x3c7 -;; 391: movq 0x30(%r14), %r11 +;; je 0x3ca +;; 394: movq 0x30(%r14), %r11 ;; movl (%r11), %ecx ;; movl 0x10(%rax), %edx ;; cmpl %edx, %ecx -;; jne 0x3c9 -;; 3a3: pushq %rax +;; jne 0x3cc +;; 3a6: pushq %rax ;; popq %rcx ;; movq 0x18(%rcx), %rbx ;; movq 8(%rcx), %rdx @@ -267,7 +267,7 @@ ;; addq $0x20, %rsp ;; popq %rbp ;; retq -;; 3c3: ud2 -;; 3c5: ud2 -;; 3c7: ud2 -;; 3c9: ud2 +;; 3c6: ud2 +;; 3c8: ud2 +;; 3ca: ud2 +;; 3cc: ud2 diff --git a/tests/disas/winch/x64/table/set.wat b/tests/disas/winch/x64/table/set.wat index 17de4c09e447..dbea2652f86a 100644 --- a/tests/disas/winch/x64/table/set.wat +++ b/tests/disas/winch/x64/table/set.wat @@ -39,7 +39,7 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x20, %r11 ;; cmpq %rsp, %r11 -;; ja 0xba +;; ja 0xbd ;; 5c: movq %rdi, %r14 ;; subq $0x20, %rsp ;; movq %rdi, 0x18(%rsp) @@ -51,7 +51,7 @@ ;; movq %r14, %rdx ;; movq 0x40(%rdx), %rbx ;; cmpq %rbx, %rcx -;; jae 0xbc +;; jae 0xbf ;; 90: movq %rcx, %r11 ;; imulq $8, %r11, %r11 ;; movq 0x38(%rdx), %rdx @@ -64,8 +64,8 @@ ;; addq $0x20, %rsp ;; popq %rbp ;; retq -;; ba: ud2 -;; bc: ud2 +;; bd: ud2 +;; bf: ud2 ;; ;; wasm[0]::function[2]: ;; pushq %rbp @@ -74,8 +74,8 @@ ;; movq 0x10(%r11), %r11 ;; addq $0x30, %r11 ;; cmpq %rsp, %r11 -;; ja 0x1da -;; dc: movq %rdi, %r14 +;; ja 0x1f0 +;; ec: movq %rdi, %r14 ;; subq $0x20, %rsp ;; movq %rdi, 0x18(%rsp) ;; movq %rsi, 0x10(%rsp) @@ -92,8 +92,8 @@ ;; movq %r14, %rdx ;; movq 0x40(%rdx), %rbx ;; cmpq %rbx, %rcx -;; jae 0x1dc -;; 132: movq %rcx, %r11 +;; jae 0x1f2 +;; 142: movq %rcx, %r11 ;; imulq $8, %r11, %r11 ;; movq 0x38(%rdx), %rdx ;; movq %rdx, %rsi @@ -102,26 +102,26 @@ ;; cmovaeq %rsi, %rdx ;; movq (%rdx), %rax ;; testq %rax, %rax -;; jne 0x18f -;; 155: subq $4, %rsp +;; jne 0x1a2 +;; 168: subq $4, %rsp ;; movl %ecx, (%rsp) ;; subq $8, %rsp ;; movq %r14, %rdi ;; movl $0, %esi ;; movl 8(%rsp), %edx -;; callq 0x498 +;; callq 0x4ae ;; addq $8, %rsp ;; addq $4, %rsp ;; movq 0x1c(%rsp), %r14 -;; jmp 0x196 -;; 18f: andq $0xfffffffffffffffe, %rax +;; jmp 0x1a9 +;; 1a2: andq $0xfffffffffffffffe, %rax ;; movl (%rsp), %ecx ;; addq $4, %rsp ;; movq %r14, %rdx ;; movq 0x40(%rdx), %rbx ;; cmpq %rbx, %rcx -;; jae 0x1de -;; 1b0: movq %rcx, %r11 +;; jae 0x1f4 +;; 1c3: movq %rcx, %r11 ;; imulq $8, %r11, %r11 ;; movq 0x38(%rdx), %rdx ;; movq %rdx, %rsi @@ -133,6 +133,6 @@ ;; addq $0x20, %rsp ;; popq %rbp ;; retq -;; 1da: ud2 -;; 1dc: ud2 -;; 1de: ud2 +;; 1f0: ud2 +;; 1f2: ud2 +;; 1f4: ud2 diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index aeffbd429bc1..59a6d4157c31 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -1205,22 +1205,29 @@ impl Assembler { /// Multiply immediate and register. pub fn mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) { - self.emit(Inst::IMulImm { - size: size.into(), - src1: dst.to_reg().into(), - src2: imm, - dst: dst.map(Into::into), - }); + use OperandSize::*; + let src = dst.to_reg(); + let dst: WritableGpr = dst.to_reg().into(); + let inst = match size { + S16 => asm::inst::imulw_rmi::new(dst, src, u16::try_from(imm).unwrap()).into(), + S32 => asm::inst::imull_rmi::new(dst, src, imm as u32).into(), + S64 => asm::inst::imulq_rmi_sxl::new(dst, src, imm).into(), + S8 | S128 => unimplemented!(), + }; + self.emit(Inst::External { inst }); } /// Multiply register and register. pub fn mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) { - self.emit(Inst::IMul { - size: size.into(), - src1: dst.to_reg().into(), - src2: src.into(), - dst: dst.map(Into::into), - }); + use OperandSize::*; + let dst = pair_gpr(dst); + let inst = match size { + S16 => asm::inst::imulw_rm::new(dst, src).into(), + S32 => asm::inst::imull_rm::new(dst, src).into(), + S64 => asm::inst::imulq_rm::new(dst, src).into(), + S8 | S128 => unimplemented!(), + }; + self.emit(Inst::External { inst }); } /// Add immediate and register. @@ -1734,14 +1741,30 @@ impl Assembler { kind: MulWideKind, size: OperandSize, ) { - self.emit(Inst::Mul { - signed: kind == MulWideKind::Signed, - size: size.into(), - src1: lhs.into(), - src2: rhs.into(), - dst_lo: dst_lo.to_reg().into(), - dst_hi: dst_hi.to_reg().into(), - }); + use MulWideKind::*; + use OperandSize::*; + let rax = asm::Fixed(PairedGpr { + read: lhs.into(), + write: WritableGpr::from_reg(dst_lo.to_reg().into()), + }); + let rdx = asm::Fixed(dst_hi.to_reg().into()); + if size == S8 { + // For `mulb` and `imulb`, both the high and low bits are written to + // RAX. + assert_eq!(dst_lo, dst_hi); + } + let inst = match (size, kind) { + (S8, Unsigned) => asm::inst::mulb_m::new(rax, rhs).into(), + (S8, Signed) => asm::inst::imulb_m::new(rax, rhs).into(), + (S16, Unsigned) => asm::inst::mulw_m::new(rax, rdx, rhs).into(), + (S16, Signed) => asm::inst::imulw_m::new(rax, rdx, rhs).into(), + (S32, Unsigned) => asm::inst::mull_m::new(rax, rdx, rhs).into(), + (S32, Signed) => asm::inst::imull_m::new(rax, rdx, rhs).into(), + (S64, Unsigned) => asm::inst::mulq_m::new(rax, rdx, rhs).into(), + (S64, Signed) => asm::inst::imulq_m::new(rax, rdx, rhs).into(), + (S128, _) => unimplemented!(), + }; + self.emit(Inst::External { inst }); } /// Shuffles bytes in `src` according to contents of `mask` and puts