Skip to content

Commit 63a8ce3

Browse files
authored
x64: Migrate xmm mov-family instructions to new assembler (#10834)
* x64: Migrate xmm mov-family instructions to new assembler This commit migrates xmm movement-style instructions, such as `mov{s{s,d},{a,u}p{s,d},dq{a,u}}` to the new assembler. This enables deleting the `XmmMovRM` class of instructions in ISLE. Along the way a number of notable changes were made: * ISLE constructors for assembler instructions now take `SyntheticAmode` instead of `Amode` since it's already supported anyway and it's a more flexible argument to take. * The conversion from `SyntheticAmode` to the assembler `Amode<Gpr>` was fixed where one variant needed to use `rsp` instead of `rbp`. * The `is_move` method was updated to ignore `movss` and `movsd` instructions since they're not true movement-related instructions in register-to-register situations. * Use `gen_move` helper in Winch
1 parent 4fe87ae commit 63a8ce3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+760
-809
lines changed

cranelift/assembler-x64/meta/src/generate/format.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ impl dsl::Format {
118118
fmtln!(f, "let digit = 0x{digit:x};");
119119
fmtln!(f, "let rex = self.{dst}.as_rex_prefix(digit, {bits});");
120120
}
121-
[Reg(dst), RegMem(src)] | [Reg(dst), RegMem(src), Imm(_)] => {
121+
[Reg(dst), RegMem(src)] | [Reg(dst), RegMem(src), Imm(_)] | [Reg(dst), Mem(src)] => {
122122
fmtln!(f, "let dst = self.{dst}.enc();");
123123
fmtln!(f, "let rex = self.{src}.as_rex_prefix(dst, {bits});");
124124
}
@@ -219,7 +219,8 @@ impl dsl::Format {
219219
| [Reg(reg), Reg(_), RegMem(mem)]
220220
| [RegMem(mem), Reg(reg)]
221221
| [RegMem(mem), Reg(reg), Imm(_)]
222-
| [RegMem(mem), Reg(reg), FixedReg(_)] => {
222+
| [RegMem(mem), Reg(reg), FixedReg(_)]
223+
| [Reg(reg), Mem(mem)] => {
223224
fmtln!(f, "let reg = self.{reg}.enc();");
224225
fmtln!(
225226
f,

cranelift/assembler-x64/meta/src/instructions.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod div;
88
mod lanes;
99
mod max;
1010
mod min;
11+
mod mov;
1112
mod mul;
1213
mod neg;
1314
mod or;
@@ -30,6 +31,7 @@ pub fn list() -> Vec<Inst> {
3031
all.extend(lanes::list());
3132
all.extend(max::list());
3233
all.extend(min::list());
34+
all.extend(mov::list());
3335
all.extend(mul::list());
3436
all.extend(neg::list());
3537
all.extend(or::list());

cranelift/assembler-x64/meta/src/instructions/lanes.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,6 @@ pub fn list() -> Vec<Inst> {
2222
inst("pinsrd", fmt("A", [rw(xmm1), r(rm32), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x22]).r().ib(), _64b | compat | sse41),
2323
inst("pinsrq", fmt("A", [rw(xmm1), r(rm64), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x22]).r().ib().w(), _64b | sse41),
2424

25-
inst("movd", fmt("A", [w(xmm1), r(rm32)]), rex([0x66, 0x0F, 0x6E]).r(), _64b | compat | sse2),
26-
inst("movq", fmt("A", [w(xmm1), r(rm64)]), rex([0x66, 0x0F, 0x6E]).r().w(), _64b | sse2),
27-
inst("movd", fmt("B", [w(rm32), r(xmm2)]), rex([0x66, 0x0F, 0x7E]).r(), _64b | compat | sse2),
28-
inst("movq", fmt("B", [w(rm64), r(xmm2)]), rex([0x66, 0x0F, 0x7E]).r().w(), _64b | sse2),
29-
3025
inst("movmskps", fmt("RM", [w(r32), r(xmm2)]), rex([0x0F, 0x50]).r(), _64b | compat | sse),
3126
inst("movmskpd", fmt("RM", [w(r32), r(xmm2)]), rex([0x66, 0x0F, 0x50]).r(), _64b | compat | sse2),
3227
inst("pmovmskb", fmt("RM", [w(r32), r(xmm2)]), rex([0x66, 0x0F, 0xD7]).r(), _64b | compat | sse2),
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
use crate::dsl::{Feature::*, Inst, Location::*};
2+
use crate::dsl::{align, fmt, inst, r, rex, rw, w};
3+
4+
#[rustfmt::skip] // Keeps instructions on a single line.
5+
pub fn list() -> Vec<Inst> {
6+
vec![
7+
inst("movd", fmt("A", [w(xmm1), r(rm32)]), rex([0x66, 0x0F, 0x6E]).r(), _64b | compat | sse2),
8+
inst("movq", fmt("A", [w(xmm1), r(rm64)]), rex([0x66, 0x0F, 0x6E]).r().w(), _64b | sse2),
9+
inst("movd", fmt("B", [w(rm32), r(xmm2)]), rex([0x66, 0x0F, 0x7E]).r(), _64b | compat | sse2),
10+
inst("movq", fmt("B", [w(rm64), r(xmm2)]), rex([0x66, 0x0F, 0x7E]).r().w(), _64b | sse2),
11+
12+
// Note that `movss` and `movsd` only have an "A" and "C" modes listed
13+
// in the Intel manual but here they're split into "*_M" and "*_R" to
14+
// model the different regalloc behavior each one has. Notably the
15+
// memory-using variant does the usual read or write the memory
16+
// depending on the instruction, but the "*_R" variant both reads and
17+
// writes the destination register because the upper bits are preserved.
18+
//
19+
// Additionally "C_R" is not specified here since it's not needed over
20+
// the "A_R" variant and it's additionally not encoded correctly as the
21+
// destination must be modeled in the ModRM:r/m byte, not the ModRM:reg
22+
// byte. Currently our encoding based on format doesn't account for this
23+
// special case, so it's just dropped here.
24+
inst("movss", fmt("A_M", [w(xmm1), r(m32)]), rex([0xF3, 0x0F, 0x10]).r(), _64b | sse),
25+
inst("movss", fmt("A_R", [rw(xmm1), r(xmm2)]), rex([0xF3, 0x0F, 0x10]).r(), _64b | sse),
26+
inst("movss", fmt("C_M", [w(m64), r(xmm1)]), rex([0xF3, 0x0F, 0x11]).r(), _64b | sse),
27+
inst("movsd", fmt("A_M", [w(xmm1), r(m32)]), rex([0xF2, 0x0F, 0x10]).r(), _64b | sse2),
28+
inst("movsd", fmt("A_R", [rw(xmm1), r(xmm2)]), rex([0xF2, 0x0F, 0x10]).r(), _64b | sse2),
29+
inst("movsd", fmt("C_M", [w(m64), r(xmm1)]), rex([0xF2, 0x0F, 0x11]).r(), _64b | sse2),
30+
31+
inst("movapd", fmt("A", [w(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x28]).r(), _64b | sse2),
32+
inst("movapd", fmt("B", [w(align(xmm_m128)), r(xmm1)]), rex([0x66, 0x0F, 0x29]).r(), _64b | sse2),
33+
inst("movaps", fmt("A", [w(xmm1), r(align(xmm_m128))]), rex([0x0F, 0x28]).r(), _64b | sse),
34+
inst("movaps", fmt("B", [w(align(xmm_m128)), r(xmm1)]), rex([0x0F, 0x29]).r(), _64b | sse),
35+
inst("movupd", fmt("A", [w(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x10]).r(), _64b | sse2),
36+
inst("movupd", fmt("B", [w(xmm_m128), r(xmm1)]), rex([0x66, 0x0F, 0x11]).r(), _64b | sse2),
37+
inst("movups", fmt("A", [w(xmm1), r(xmm_m128)]), rex([0x0F, 0x10]).r(), _64b | sse),
38+
inst("movups", fmt("B", [w(xmm_m128), r(xmm1)]), rex([0x0F, 0x11]).r(), _64b | sse),
39+
inst("movdqa", fmt("A", [w(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x6F]).r(), _64b | sse2),
40+
inst("movdqa", fmt("B", [w(align(xmm_m128)), r(xmm1)]), rex([0x66, 0x0F, 0x7F]).r(), _64b | sse2),
41+
inst("movdqu", fmt("A", [w(xmm1), r(xmm_m128)]), rex([0xF3, 0x0F, 0x6F]).r(), _64b | sse2),
42+
inst("movdqu", fmt("B", [w(xmm_m128), r(xmm1)]), rex([0xF3, 0x0F, 0x7F]).r(), _64b | sse2),
43+
]
44+
}

cranelift/assembler-x64/src/api.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,4 +252,12 @@ pub trait RegisterVisitor<R: Registers> {
252252
XmmMem::Mem(m) => self.read_amode(m),
253253
}
254254
}
255+
256+
/// Helper method to handle a write [`XmmMem`] operand.
257+
fn write_xmm_mem(&mut self, op: &mut XmmMem<R::WriteXmm, R::ReadGpr>) {
258+
match op {
259+
XmmMem::Xmm(r) => self.write_xmm(r),
260+
XmmMem::Mem(m) => self.read_amode(m),
261+
}
262+
}
255263
}

cranelift/codegen/meta/src/gen_asm.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ pub fn rust_param_raw(op: &Operand) -> String {
2525
format!("&{reg}Mem{aligned}")
2626
}
2727
OperandKind::Mem(_) => {
28-
format!("&Amode")
28+
format!("&SyntheticAmode")
2929
}
3030
OperandKind::Reg(r) | OperandKind::FixedReg(r) => r.reg_class().unwrap().to_string(),
3131
}
@@ -223,9 +223,9 @@ pub fn isle_param_raw(op: &Operand) -> String {
223223
OperandKind::Reg(r) | OperandKind::FixedReg(r) => r.reg_class().unwrap().to_string(),
224224
OperandKind::Mem(_) => {
225225
if op.align {
226-
unimplemented!("no way yet to mark an Amode as aligned")
226+
unimplemented!("no way yet to mark an SyntheticAmode as aligned")
227227
} else {
228-
"Amode".to_string()
228+
"SyntheticAmode".to_string()
229229
}
230230
}
231231
OperandKind::RegMem(rm) => {
@@ -316,7 +316,7 @@ pub fn isle_param_for_ctor(op: &Operand, ctor: IsleConstructor) -> String {
316316
// other constructor it's operating on registers so the argument is
317317
// a `Gpr`.
318318
OperandKind::RegMem(_) if op.mutability.is_write() => match ctor {
319-
IsleConstructor::RetMemorySideEffect => "Amode".to_string(),
319+
IsleConstructor::RetMemorySideEffect => "SyntheticAmode".to_string(),
320320
IsleConstructor::RetGpr => "Gpr".to_string(),
321321
IsleConstructor::RetXmm => "Xmm".to_string(),
322322
IsleConstructor::RetValueRegs => "ValueRegs".to_string(),
@@ -581,5 +581,9 @@ pub fn generate_isle(f: &mut Formatter, insts: &[Inst]) {
581581
/// `RegMem::Mem`, an operand from the constructor of the original entrypoint
582582
/// itself.
583583
fn is_raw_operand_param(o: &Operand) -> bool {
584-
o.mutability.is_read() || matches!(o.location.kind(), OperandKind::RegMem(_))
584+
o.mutability.is_read()
585+
|| matches!(
586+
o.location.kind(),
587+
OperandKind::RegMem(_) | OperandKind::Mem(_)
588+
)
585589
}

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 16 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -330,12 +330,6 @@
330330
(src XmmMem)
331331
(dst WritableXmm))
332332

333-
;; XMM (scalar or vector) unary op (from xmm to reg/mem): stores, movd,
334-
;; movq
335-
(XmmMovRM (op SseOpcode)
336-
(src Xmm)
337-
(dst SyntheticAmode))
338-
339333
;; Conversion from signed integers to floats, the `{v,}`cvtsi2s{s,d}`
340334
;; instructions.
341335
;;
@@ -764,15 +758,7 @@
764758
Divss
765759
Divsd
766760
Insertps
767-
Movaps
768-
Movapd
769-
Movdqa
770-
Movdqu
771761
Movlhps
772-
Movss
773-
Movsd
774-
Movups
775-
Movupd
776762
Pabsb
777763
Pabsw
778764
Pabsd
@@ -2285,57 +2271,49 @@
22852271
dst))
22862272

22872273
(decl x64_movss_load (SyntheticAmode) Xmm)
2288-
(rule (x64_movss_load from)
2289-
(xmm_unary_rm_r_unaligned (SseOpcode.Movss) from))
2274+
(rule (x64_movss_load from) (x64_movss_a_m from))
22902275
(rule 1 (x64_movss_load from)
22912276
(if-let true (use_avx))
22922277
(xmm_unary_rm_r_vex (AvxOpcode.Vmovss) from))
22932278

22942279
(decl x64_movss_store (SyntheticAmode Xmm) SideEffectNoResult)
2295-
(rule (x64_movss_store addr data)
2296-
(xmm_movrm (SseOpcode.Movss) addr data))
2280+
(rule (x64_movss_store addr data) (x64_movss_c_m_mem addr data))
22972281
(rule 1 (x64_movss_store addr data)
22982282
(if-let true (use_avx))
22992283
(xmm_movrm_vex (AvxOpcode.Vmovss) addr data))
23002284

23012285
(decl x64_movsd_load (SyntheticAmode) Xmm)
2302-
(rule (x64_movsd_load from)
2303-
(xmm_unary_rm_r_unaligned (SseOpcode.Movsd) from))
2286+
(rule (x64_movsd_load from) (x64_movsd_a_m from))
23042287
(rule 1 (x64_movsd_load from)
23052288
(if-let true (use_avx))
23062289
(xmm_unary_rm_r_vex (AvxOpcode.Vmovsd) from))
23072290

23082291
(decl x64_movsd_store (SyntheticAmode Xmm) SideEffectNoResult)
2309-
(rule (x64_movsd_store addr data)
2310-
(xmm_movrm (SseOpcode.Movsd) addr data))
2292+
(rule (x64_movsd_store addr data) (x64_movsd_c_m_mem addr data))
23112293
(rule 1 (x64_movsd_store addr data)
23122294
(if-let true (use_avx))
23132295
(xmm_movrm_vex (AvxOpcode.Vmovsd) addr data))
23142296

23152297
(decl x64_movups_load (SyntheticAmode) Xmm)
2316-
(rule (x64_movups_load from)
2317-
(xmm_unary_rm_r_unaligned (SseOpcode.Movups) from))
2298+
(rule (x64_movups_load from) (x64_movups_a from))
23182299
(rule 1 (x64_movups_load from)
23192300
(if-let true (use_avx))
23202301
(xmm_unary_rm_r_vex (AvxOpcode.Vmovups) from))
23212302

23222303
(decl x64_movups_store (SyntheticAmode Xmm) SideEffectNoResult)
2323-
(rule (x64_movups_store addr data)
2324-
(xmm_movrm (SseOpcode.Movups) addr data))
2304+
(rule (x64_movups_store addr data) (x64_movups_b_mem addr data))
23252305
(rule 1 (x64_movups_store addr data)
23262306
(if-let true (use_avx))
23272307
(xmm_movrm_vex (AvxOpcode.Vmovups) addr data))
23282308

23292309
(decl x64_movupd_load (SyntheticAmode) Xmm)
2330-
(rule (x64_movupd_load from)
2331-
(xmm_unary_rm_r_unaligned (SseOpcode.Movupd) from))
2310+
(rule (x64_movupd_load from) (x64_movupd_a from))
23322311
(rule 1 (x64_movupd_load from)
23332312
(if-let true (use_avx))
23342313
(xmm_unary_rm_r_vex (AvxOpcode.Vmovupd) from))
23352314

23362315
(decl x64_movupd_store (SyntheticAmode Xmm) SideEffectNoResult)
2337-
(rule (x64_movupd_store addr data)
2338-
(xmm_movrm (SseOpcode.Movupd) addr data))
2316+
(rule (x64_movupd_store addr data) (x64_movupd_b_mem addr data))
23392317
(rule 1 (x64_movupd_store addr data)
23402318
(if-let true (use_avx))
23412319
(xmm_movrm_vex (AvxOpcode.Vmovupd) addr data))
@@ -2369,15 +2347,13 @@
23692347
(xmm_to_gpr_vex (AvxOpcode.Vmovq) from (OperandSize.Size64)))
23702348

23712349
(decl x64_movdqu_load (XmmMem) Xmm)
2372-
(rule (x64_movdqu_load from)
2373-
(xmm_unary_rm_r_unaligned (SseOpcode.Movdqu) from))
2350+
(rule (x64_movdqu_load from) (x64_movdqu_a from))
23742351
(rule 1 (x64_movdqu_load from)
23752352
(if-let true (use_avx))
23762353
(xmm_unary_rm_r_vex (AvxOpcode.Vmovdqu) from))
23772354

23782355
(decl x64_movdqu_store (SyntheticAmode Xmm) SideEffectNoResult)
2379-
(rule (x64_movdqu_store addr data)
2380-
(xmm_movrm (SseOpcode.Movdqu) addr data))
2356+
(rule (x64_movdqu_store addr data) (x64_movdqu_b_mem addr data))
23812357
(rule 1 (x64_movdqu_store addr data)
23822358
(if-let true (use_avx))
23832359
(xmm_movrm_vex (AvxOpcode.Vmovdqu) addr data))
@@ -2436,10 +2412,6 @@
24362412
(let ((size OperandSize (raw_operand_size_of_type ty)))
24372413
(SideEffectNoResult.Inst (MInst.MovImmM size imm addr))))
24382414

2439-
(decl xmm_movrm (SseOpcode SyntheticAmode Xmm) SideEffectNoResult)
2440-
(rule (xmm_movrm op addr data)
2441-
(SideEffectNoResult.Inst (MInst.XmmMovRM op data addr)))
2442-
24432415
(decl xmm_movrm_vex (AvxOpcode SyntheticAmode Xmm) SideEffectNoResult)
24442416
(rule (xmm_movrm_vex op addr data)
24452417
(SideEffectNoResult.Inst (MInst.XmmMovRMVex op data addr)))
@@ -3681,15 +3653,13 @@
36813653
;; when used as a load instruction it wipes out the entire destination register
36823654
;; which defeats the purpose of this being a 2-operand instruction.
36833655
(decl x64_movsd_regmove (Xmm Xmm) Xmm)
3684-
(rule (x64_movsd_regmove src1 src2)
3685-
(xmm_rm_r_unaligned (SseOpcode.Movsd) src1 src2))
3656+
(rule (x64_movsd_regmove src1 src2) (x64_movsd_a_r src1 src2))
36863657
(rule 1 (x64_movsd_regmove src1 src2)
36873658
(if-let true (use_avx))
36883659
(xmm_rmir_vex (AvxOpcode.Vmovsd) src1 src2))
36893660

36903661
(decl x64_movss_regmove (Xmm Xmm) Xmm)
3691-
(rule (x64_movss_regmove src1 src2)
3692-
(xmm_rm_r_unaligned (SseOpcode.Movss) src1 src2))
3662+
(rule (x64_movss_regmove src1 src2) (x64_movss_a_r src1 src2))
36933663
(rule 1 (x64_movss_regmove src1 src2)
36943664
(if-let true (use_avx))
36953665
(xmm_rmir_vex (AvxOpcode.Vmovss) src1 src2))
@@ -5438,6 +5408,7 @@
54385408
(convert WritableXmm WritableReg writable_xmm_to_reg)
54395409
(convert WritableXmm Reg writable_xmm_to_r_reg)
54405410
(convert WritableXmm XmmMem writable_xmm_to_xmm_mem)
5411+
(convert WritableXmm XmmMemAligned writable_xmm_to_xmm_mem_aligned)
54415412
(convert WritableXmm ValueRegs writable_xmm_to_value_regs)
54425413

54435414
;; Note that these conversions will introduce a `movupd` instruction if
@@ -5496,6 +5467,9 @@
54965467
(decl writable_xmm_to_xmm_mem (WritableXmm) XmmMem)
54975468
(rule (writable_xmm_to_xmm_mem w_xmm)
54985469
(xmm_to_xmm_mem (writable_xmm_to_xmm w_xmm)))
5470+
(decl writable_xmm_to_xmm_mem_aligned (WritableXmm) XmmMemAligned)
5471+
(rule (writable_xmm_to_xmm_mem_aligned w_xmm)
5472+
(xmm_to_xmm_mem_aligned (writable_xmm_to_xmm w_xmm)))
54995473
(decl writable_xmm_to_value_regs (WritableXmm) ValueRegs)
55005474
(rule (writable_xmm_to_value_regs w_xmm)
55015475
(value_reg w_xmm))

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -834,15 +834,7 @@ pub enum SseOpcode {
834834
Divss,
835835
Divsd,
836836
Insertps,
837-
Movaps,
838-
Movapd,
839-
Movdqa,
840-
Movdqu,
841837
Movlhps,
842-
Movss,
843-
Movsd,
844-
Movups,
845-
Movupd,
846838
Pabsb,
847839
Pabsw,
848840
Pabsd,
@@ -911,10 +903,7 @@ impl SseOpcode {
911903
| SseOpcode::Cmpss
912904
| SseOpcode::Divps
913905
| SseOpcode::Divss
914-
| SseOpcode::Movaps
915906
| SseOpcode::Movlhps
916-
| SseOpcode::Movss
917-
| SseOpcode::Movups
918907
| SseOpcode::Rcpss
919908
| SseOpcode::Rsqrtss
920909
| SseOpcode::Shufps
@@ -925,11 +914,6 @@ impl SseOpcode {
925914
| SseOpcode::Comisd
926915
| SseOpcode::Divpd
927916
| SseOpcode::Divsd
928-
| SseOpcode::Movapd
929-
| SseOpcode::Movsd
930-
| SseOpcode::Movupd
931-
| SseOpcode::Movdqa
932-
| SseOpcode::Movdqu
933917
| SseOpcode::Packssdw
934918
| SseOpcode::Packsswb
935919
| SseOpcode::Packuswb
@@ -1034,15 +1018,7 @@ impl fmt::Debug for SseOpcode {
10341018
SseOpcode::Divss => "divss",
10351019
SseOpcode::Divsd => "divsd",
10361020
SseOpcode::Insertps => "insertps",
1037-
SseOpcode::Movaps => "movaps",
1038-
SseOpcode::Movapd => "movapd",
1039-
SseOpcode::Movdqa => "movdqa",
1040-
SseOpcode::Movdqu => "movdqu",
10411021
SseOpcode::Movlhps => "movlhps",
1042-
SseOpcode::Movss => "movss",
1043-
SseOpcode::Movsd => "movsd",
1044-
SseOpcode::Movups => "movups",
1045-
SseOpcode::Movupd => "movupd",
10461022
SseOpcode::Pabsb => "pabsb",
10471023
SseOpcode::Pabsw => "pabsw",
10481024
SseOpcode::Pabsd => "pabsd",

0 commit comments

Comments
 (0)