Skip to content

Commit 4a8eaa7

Browse files
authored
cranelift(x64): Swap operands to save AVX instruction encoding size (bytecodealliance#7093)
For `XmmRmiRVex`-format instructions, when the opcode is commutative, the first operand is one of xmm{0..7}, and the second operand is one of xmm{8..15}, then we can swap the operands to save a byte on instruction encoding.
1 parent 5c1557d commit 4a8eaa7

File tree

3 files changed

+134
-1
lines changed

3 files changed

+134
-1
lines changed

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,12 @@ impl From<RegMem> for RegMemImm {
669669
}
670670
}
671671

672+
impl From<Reg> for RegMemImm {
673+
fn from(reg: Reg) -> Self {
674+
RegMemImm::Reg { reg }
675+
}
676+
}
677+
672678
impl PrettyPrint for RegMemImm {
673679
fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
674680
match self {
@@ -1761,6 +1767,47 @@ impl AvxOpcode {
17611767
}
17621768
}
17631769
}
1770+
1771+
/// Is the opcode known to be commutative?
1772+
///
1773+
/// Note that this method is not exhaustive, and there may be commutative
1774+
/// opcodes that we don't recognize as commutative.
1775+
pub(crate) fn is_commutative(&self) -> bool {
1776+
match *self {
1777+
AvxOpcode::Vpaddb
1778+
| AvxOpcode::Vpaddw
1779+
| AvxOpcode::Vpaddd
1780+
| AvxOpcode::Vpaddq
1781+
| AvxOpcode::Vpaddsb
1782+
| AvxOpcode::Vpaddsw
1783+
| AvxOpcode::Vpaddusb
1784+
| AvxOpcode::Vpaddusw
1785+
| AvxOpcode::Vpand
1786+
| AvxOpcode::Vandps
1787+
| AvxOpcode::Vandpd
1788+
| AvxOpcode::Vpor
1789+
| AvxOpcode::Vorps
1790+
| AvxOpcode::Vorpd
1791+
| AvxOpcode::Vpxor
1792+
| AvxOpcode::Vxorps
1793+
| AvxOpcode::Vxorpd
1794+
| AvxOpcode::Vpmuldq
1795+
| AvxOpcode::Vpmuludq
1796+
| AvxOpcode::Vaddps
1797+
| AvxOpcode::Vaddpd
1798+
| AvxOpcode::Vmulps
1799+
| AvxOpcode::Vmulpd
1800+
| AvxOpcode::Vpcmpeqb
1801+
| AvxOpcode::Vpcmpeqw
1802+
| AvxOpcode::Vpcmpeqd
1803+
| AvxOpcode::Vpcmpeqq
1804+
| AvxOpcode::Vaddss
1805+
| AvxOpcode::Vaddsd
1806+
| AvxOpcode::Vmulss
1807+
| AvxOpcode::Vmulsd => true,
1808+
_ => false,
1809+
}
1810+
}
17641811
}
17651812

17661813
impl fmt::Display for AvxOpcode {

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2285,6 +2285,20 @@ pub(crate) fn emit(
22852285
let src1 = allocs.next(src1.to_reg());
22862286
let src2 = src2.clone().to_reg_mem_imm().with_allocs(allocs);
22872287

2288+
// When the opcode is commutative, src1 is xmm{0..7}, and src2 is
2289+
// xmm{8..15}, then we can swap the operands to save one byte on the
2290+
// instruction's encoding.
2291+
let (src1, src2) = match (src1, src2) {
2292+
(src1, RegMemImm::Reg { reg: src2 })
2293+
if op.is_commutative()
2294+
&& src1.to_real_reg().unwrap().hw_enc() < 8
2295+
&& src2.to_real_reg().unwrap().hw_enc() >= 8 =>
2296+
{
2297+
(src2, RegMemImm::Reg { reg: src1 })
2298+
}
2299+
(src1, src2) => (src1, src2),
2300+
};
2301+
22882302
let src2 = match src2 {
22892303
// For opcodes where one of the operands is an immediate the
22902304
// encoding is a bit different, notably the usage of
@@ -2319,6 +2333,7 @@ pub(crate) fn emit(
23192333
}
23202334
RegMemImm::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
23212335
};
2336+
23222337
let (prefix, map, opcode) = match op {
23232338
AvxOpcode::Vminps => (LP::None, OM::_0F, 0x5D),
23242339
AvxOpcode::Vminpd => (LP::_66, OM::_0F, 0x5D),

cranelift/codegen/src/isa/x64/inst/emit_tests.rs

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
//! -- isa::x64::inst::emit_tests::test_x64_emit
1414
1515
use super::*;
16-
use crate::ir::UserExternalNameRef;
16+
use crate::ir::{MemFlags, UserExternalNameRef};
1717
use crate::isa::x64;
1818
use alloc::boxed::Box;
1919
use alloc::vec::Vec;
@@ -5159,6 +5159,77 @@ fn test_x64_emit() {
51595159
"roundpd $0, %xmm15, %xmm15",
51605160
));
51615161

5162+
// ========================================================
5163+
// XmmRmiRVex
5164+
5165+
// Standard instruction w/ XmmMemImm::Reg operand.
5166+
insns.push((
5167+
Inst::XmmRmiRVex {
5168+
op: AvxOpcode::Vpmaxub,
5169+
dst: Writable::from_reg(Xmm::new(xmm13).unwrap()),
5170+
src1: Xmm::new(xmm1).unwrap(),
5171+
src2: XmmMemImm::new(xmm12.into()).unwrap(),
5172+
},
5173+
"C44171DEEC",
5174+
"vpmaxub %xmm1, %xmm12, %xmm13",
5175+
));
5176+
5177+
// Standard instruction w/ XmmMemImm::Mem operand.
5178+
insns.push((
5179+
Inst::XmmRmiRVex {
5180+
op: AvxOpcode::Vpmaxub,
5181+
dst: Writable::from_reg(Xmm::new(xmm13).unwrap()),
5182+
src1: Xmm::new(xmm1).unwrap(),
5183+
src2: XmmMemImm::new(RegMemImm::Mem {
5184+
addr: Amode::ImmReg {
5185+
simm32: 10,
5186+
base: rax,
5187+
flags: MemFlags::trusted(),
5188+
}
5189+
.into(),
5190+
})
5191+
.unwrap(),
5192+
},
5193+
"C571DE680A",
5194+
"vpmaxub %xmm1, 10(%rax), %xmm13",
5195+
));
5196+
5197+
// When there's an immediate.
5198+
insns.push((
5199+
Inst::XmmRmiRVex {
5200+
op: AvxOpcode::Vpsrlw,
5201+
dst: Writable::from_reg(Xmm::new(xmm13).unwrap()),
5202+
src1: Xmm::new(xmm1).unwrap(),
5203+
src2: XmmMemImm::new(RegMemImm::Imm { simm32: 36 }).unwrap(),
5204+
},
5205+
"C59171D124",
5206+
"vpsrlw %xmm1, $36, %xmm13",
5207+
));
5208+
5209+
// Certain commutative ops get their operands swapped to avoid relying on an
5210+
// extra prefix byte, when possible. Note that these two instructions encode
5211+
// to the same bytes, and are 4-byte encodings rather than 5-byte encodings.
5212+
insns.push((
5213+
Inst::XmmRmiRVex {
5214+
op: AvxOpcode::Vmulsd,
5215+
dst: Writable::from_reg(Xmm::new(xmm13).unwrap()),
5216+
src1: Xmm::new(xmm1).unwrap(),
5217+
src2: XmmMemImm::new(xmm12.into()).unwrap(),
5218+
},
5219+
"C51B59E9",
5220+
"vmulsd %xmm1, %xmm12, %xmm13",
5221+
));
5222+
insns.push((
5223+
Inst::XmmRmiRVex {
5224+
op: AvxOpcode::Vmulsd,
5225+
dst: Writable::from_reg(Xmm::new(xmm13).unwrap()),
5226+
src1: Xmm::new(xmm12).unwrap(),
5227+
src2: XmmMemImm::new(xmm1.into()).unwrap(),
5228+
},
5229+
"C51B59E9",
5230+
"vmulsd %xmm12, %xmm1, %xmm13",
5231+
));
5232+
51625233
// ========================================================
51635234
// XmmRmRImmVex
51645235
insns.push((

0 commit comments

Comments
 (0)