Skip to content

Commit 2473041

Browse files
alexcrichtonabrown
andauthored
x64: Migrate SignExtendData to new assembler (bytecodealliance#10814)
* x64: Migrate `SignExtendData` to new assembler I woke up early today and figure how better to start the day than to frob some assembler instructions. I'd never seen the `SignExtendData` before and it was trickier and more subtle than I thought. The end result of this PR is to remove this pseudo-instruction and replace it with the per-instruction equivalents that it can generate. The first thing surprising about this instruction is that the Capstone and Intel names disagree. For example what Intel calls `CBW` Capstone calls `cbtw`. This is true for all the various instructions here so the names are all changing subtly. The second surprising thing was that the behavior of the instruction depended on the operand size. This instruction was tasked with sign-extending data just before a `div` instruction which requires the operand to be in `AX` (sign-extended) for 8-bit division, but otherwise requires `DX:AX` or wider for wider division. That meant that the 8-bit version of `SignExtendData` would read `AL` and write `AX`, but the wider variants would read `AX` and write `DX` for example. This involved writing a specialized helper in ISLE for handling the 16-bits-or-higher width and otherwise customizing each of the various cases for 8-bit or wider already in ISLE. Winch received appropriate minor updates as well, and in the end we've now got a few more instructions bound which Cranelift doesn't currently used but should be easier to do so in the future. * Update cranelift/assembler-x64/meta/src/instructions/bitmanip.rs Co-authored-by: Andrew Brown <[email protected]> --------- Co-authored-by: Andrew Brown <[email protected]>
1 parent 446d650 commit 2473041

File tree

12 files changed

+77
-116
lines changed

12 files changed

+77
-116
lines changed

cranelift/assembler-x64/meta/src/generate/format.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ impl dsl::Format {
9494
let bits = "w_bit, uses_8bit";
9595

9696
match self.operands_by_kind().as_slice() {
97-
[FixedReg(dst), Imm(_)] => {
97+
[FixedReg(dst), FixedReg(_)] | [FixedReg(dst)] | [FixedReg(dst), Imm(_)] => {
9898
// TODO: don't emit REX byte here.
9999
assert_eq!(rex.digit, None);
100100
fmtln!(f, "let digit = 0;");
@@ -148,7 +148,7 @@ impl dsl::Format {
148148
}
149149

150150
match self.operands_by_kind().as_slice() {
151-
[FixedReg(_), Imm(_)] => {
151+
[FixedReg(_)] | [FixedReg(_), FixedReg(_)] | [FixedReg(_), Imm(_)] => {
152152
// No need to emit a ModRM byte: we know the register used.
153153
}
154154
[Reg(reg), Imm(_)] => {

cranelift/assembler-x64/meta/src/instructions/bitmanip.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::dsl::{Feature::*, Inst, Location::*};
2-
use crate::dsl::{fmt, inst, r, rex, w};
2+
use crate::dsl::{fmt, implicit, inst, r, rex, rw, w};
33

44
#[rustfmt::skip] // Keeps instructions on a single line.
55
pub fn list() -> Vec<Inst> {
@@ -23,5 +23,21 @@ pub fn list() -> Vec<Inst> {
2323
inst("popcntw", fmt("RM", [w(r16), r(rm16)]), rex([0x66, 0xF3, 0x0F, 0xB8]).r(), _64b | compat | popcnt),
2424
inst("popcntl", fmt("RM", [w(r32), r(rm32)]), rex([0xF3, 0x0F, 0xB8]).r(), _64b | compat | popcnt),
2525
inst("popcntq", fmt("RM", [w(r64), r(rm64)]), rex([0xF3, 0x0F, 0xB8]).r().w(), _64b | popcnt),
26+
27+
// Note that the Intel manual calls has different names for these
28+
// instructions than Capstone gives them:
29+
//
30+
// * cbtw => cbw
31+
// * cwtl => cwde
32+
// * cltq => cwqe
33+
// * cwtd => cwd
34+
// * cltd => cdq
35+
// * cqto => cqo
36+
inst("cbtw", fmt("ZO", [rw(implicit(ax))]), rex([0x66, 0x98]), _64b | compat),
37+
inst("cwtl", fmt("ZO", [rw(implicit(eax))]), rex([0x98]), _64b | compat),
38+
inst("cltq", fmt("ZO", [rw(implicit(rax))]), rex([0x98]).w(), _64b),
39+
inst("cwtd", fmt("ZO", [w(implicit(dx)), r(implicit(ax))]), rex([0x66, 0x99]), _64b | compat),
40+
inst("cltd", fmt("ZO", [w(implicit(edx)), r(implicit(eax))]), rex([0x99]), _64b | compat),
41+
inst("cqto", fmt("ZO", [w(implicit(rdx)), r(implicit(rax))]), rex([0x99]).w(), _64b),
2642
]
2743
}

cranelift/assembler-x64/src/fuzz.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ pub fn roundtrip(inst: &Inst<FuzzRegs>) {
2727
// off the instruction offset first.
2828
let expected = expected.split_once(' ').unwrap().1;
2929
let actual = inst.to_string();
30-
if expected != actual && expected != fix_up(&actual) {
30+
if expected != actual && expected.trim() != fix_up(&actual) {
3131
println!("> {inst}");
3232
println!(" debug: {inst:x?}");
3333
println!(" assembled: {}", pretty_print_hexadecimal(&assembled));

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,6 @@
8989
(divisor Gpr)
9090
(dst WritableGpr))
9191

92-
;; Do a sign-extend based on the sign of the value in rax into rdx: (cwd
93-
;; cdq cqo) or al into ah: (cbw)
94-
(SignExtendData (size OperandSize) ;; 1, 2, 4, or 8
95-
(src Gpr)
96-
(dst WritableGpr))
97-
9892
;; Constant materialization: (imm32 imm64) reg.
9993
;;
10094
;; Either: movl $imm32, %reg32 or movabsq $imm64, %reg32.
@@ -5535,13 +5529,6 @@
55355529
(rule (x64_div_remainder dividend_lo dividend_hi divisor size sign trap)
55365530
(value_regs_get (x64_div dividend_lo dividend_hi divisor size sign trap) 1))
55375531

5538-
;; Helper for creating `SignExtendData` instructions
5539-
(decl x64_sign_extend_data (Gpr OperandSize) Gpr)
5540-
(rule (x64_sign_extend_data src size)
5541-
(let ((dst WritableGpr (temp_writable_gpr))
5542-
(_ Unit (emit (MInst.SignExtendData size src dst))))
5543-
dst))
5544-
55455532
;;;; Pinned Register ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
55465533

55475534
(decl read_pinned_gpr () Gpr)

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -369,32 +369,6 @@ pub(crate) fn emit(
369369
.encode(sink);
370370
}
371371

372-
Inst::SignExtendData { size, src, dst } => {
373-
let src = src.to_reg();
374-
let dst = dst.to_reg().to_reg();
375-
debug_assert_eq!(src, regs::rax());
376-
if *size == OperandSize::Size8 {
377-
debug_assert_eq!(dst, regs::rax());
378-
} else {
379-
debug_assert_eq!(dst, regs::rdx());
380-
}
381-
match size {
382-
OperandSize::Size8 => {
383-
sink.put1(0x66);
384-
sink.put1(0x98);
385-
}
386-
OperandSize::Size16 => {
387-
sink.put1(0x66);
388-
sink.put1(0x99);
389-
}
390-
OperandSize::Size32 => sink.put1(0x99),
391-
OperandSize::Size64 => {
392-
sink.put1(0x48);
393-
sink.put1(0x99);
394-
}
395-
}
396-
}
397-
398372
Inst::CheckedSRemSeq { divisor, .. } | Inst::CheckedSRemSeq8 { divisor, .. } => {
399373
let divisor = divisor.to_reg();
400374

cranelift/codegen/src/isa/x64/inst/mod.rs

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ impl Inst {
122122
| Inst::Ret { .. }
123123
| Inst::Setcc { .. }
124124
| Inst::ShiftR { .. }
125-
| Inst::SignExtendData { .. }
126125
| Inst::StackSwitchBasic { .. }
127126
| Inst::TrapIf { .. }
128127
| Inst::TrapIfAnd { .. }
@@ -798,18 +797,6 @@ impl PrettyPrint for Inst {
798797
format!("checked_srem_seq {dividend}, {divisor}, {dst}")
799798
}
800799

801-
Inst::SignExtendData { size, src, dst } => {
802-
let src = pretty_print_reg(src.to_reg(), size.to_bytes());
803-
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes());
804-
let op = match size {
805-
OperandSize::Size8 => "cbw",
806-
OperandSize::Size16 => "cwd",
807-
OperandSize::Size32 => "cdq",
808-
OperandSize::Size64 => "cqo",
809-
};
810-
format!("{op} {src}, {dst}")
811-
}
812-
813800
Inst::XmmUnaryRmR { op, src, dst, .. } => {
814801
let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size());
815802
let src = src.pretty_print(op.src_size());
@@ -1931,22 +1918,6 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
19311918
collector.reg_fixed_use(src1, regs::rdx());
19321919
src2.get_operands(collector);
19331920
}
1934-
Inst::SignExtendData { size, src, dst } => {
1935-
match size {
1936-
OperandSize::Size8 => {
1937-
// Note `rax` on both src and dest: 8->16 extend
1938-
// does AL -> AX.
1939-
collector.reg_fixed_use(src, regs::rax());
1940-
collector.reg_fixed_def(dst, regs::rax());
1941-
}
1942-
_ => {
1943-
// All other widths do RAX -> RDX (AX -> DX:AX,
1944-
// EAX -> EDX:EAX).
1945-
collector.reg_fixed_use(src, regs::rax());
1946-
collector.reg_fixed_def(dst, regs::rdx());
1947-
}
1948-
}
1949-
}
19501921
Inst::UnaryRmRVex { src, dst, .. } | Inst::UnaryRmRImmVex { src, dst, .. } => {
19511922
collector.reg_def(dst);
19521923
src.get_operands(collector);

cranelift/codegen/src/isa/x64/lower.isle

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4409,23 +4409,31 @@
44094409
;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
44104410

44114411
(rule 2 (lower (sdiv a @ (value_type $I8) b))
4412-
(x64_div8 (x64_sign_extend_data a (OperandSize.Size8))
4412+
(x64_div8 (x64_cbtw_zo a)
44134413
(nonzero_sdiv_divisor $I8 b)
44144414
(DivSignedness.Signed)
44154415
(TrapCode.INTEGER_OVERFLOW)))
44164416

44174417
(rule 1 (lower (sdiv a @ (value_type (fits_in_64 ty)) b))
4418-
(let (
4419-
(a Gpr a)
4420-
(size OperandSize (raw_operand_size_of_type ty))
4421-
)
4418+
(let ((a Gpr a))
44224419
(x64_div_quotient a
4423-
(x64_sign_extend_data a size)
4420+
(repeat_sign_bit ty a)
44244421
(nonzero_sdiv_divisor ty b)
4425-
size
4422+
(raw_operand_size_of_type ty)
44264423
(DivSignedness.Signed)
44274424
(TrapCode.INTEGER_OVERFLOW))))
44284425

4426+
;; Repeats the sign bit in the provided gpr, which will register-allocate to
4427+
;; %rax, into a destination gpr which will register-allocate to %rdx.
4428+
;;
4429+
;; This is intended to be used before x64 `div` instructions where
4430+
;; left-hand-side (divisor? dividend? I always forget) is double-wide and
4431+
;; present across the rax/rdx registers (sized to the operation in question).
4432+
(decl repeat_sign_bit (Type Gpr) Gpr)
4433+
(rule (repeat_sign_bit $I16 src) (x64_cwtd_zo src))
4434+
(rule (repeat_sign_bit $I32 src) (x64_cltd_zo src))
4435+
(rule (repeat_sign_bit $I64 src) (x64_cqto_zo src))
4436+
44294437
;; Checks to make sure that the input `Value` is a non-zero value for `sdiv`.
44304438
;;
44314439
;; This is required to differentiate the divide-by-zero trap from the
@@ -4474,7 +4482,7 @@
44744482
(rule 3 (lower (srem a @ (value_type $I8) (iconst imm)))
44754483
(if-let n (safe_divisor_from_imm64 $I8 imm))
44764484
(let (
4477-
(a Gpr (x64_sign_extend_data a (OperandSize.Size8)))
4485+
(a Gpr (x64_cbtw_zo a))
44784486
(result Gpr (x64_div8 a (imm $I8 n) (DivSignedness.Signed) (TrapCode.INTEGER_DIVISION_BY_ZERO)))
44794487
)
44804488
(x64_shr $I64 result (Imm8Reg.Imm8 8))))
@@ -4487,23 +4495,20 @@
44874495
(size OperandSize (raw_operand_size_of_type ty))
44884496
)
44894497
(x64_div_remainder a
4490-
(x64_sign_extend_data a size)
4498+
(repeat_sign_bit ty a)
44914499
(imm ty n)
44924500
size
44934501
(DivSignedness.Signed)
44944502
(TrapCode.INTEGER_DIVISION_BY_ZERO))))
44954503

44964504
(rule 1 (lower (srem a @ (value_type $I8) b))
4497-
(let (
4498-
(a Gpr (x64_sign_extend_data a (OperandSize.Size8)))
4499-
)
4500-
(x64_shr $I64 (x64_checked_srem_seq8 a b) (Imm8Reg.Imm8 8))))
4505+
(x64_shr $I64 (x64_checked_srem_seq8 (x64_cbtw_zo a) b) (Imm8Reg.Imm8 8)))
45014506

45024507
(rule (lower (srem a @ (value_type ty) b))
45034508
(let (
45044509
(a Gpr a)
45054510
(size OperandSize (raw_operand_size_of_type ty))
4506-
(hi Gpr (x64_sign_extend_data a size))
4511+
(hi Gpr (repeat_sign_bit ty a))
45074512
(tmp ValueRegs (x64_checked_srem_seq size a hi b))
45084513
)
45094514
(value_regs_get tmp 1)))

cranelift/codegen/src/isa/x64/pcc.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,6 @@ pub(crate) fn check(
152152

153153
Inst::CheckedSRemSeq8 { dst, .. } => undefined_result(ctx, vcode, dst, 64, 64),
154154

155-
Inst::SignExtendData { dst, .. } => undefined_result(ctx, vcode, dst, 64, 64),
156-
157155
Inst::Imm { simm64, dst, .. } => {
158156
check_output(ctx, vcode, dst.to_writable_reg(), &[], |_vcode| {
159157
Ok(Some(Fact::constant(64, simm64)))

cranelift/filetests/filetests/isa/x64/div-checks.clif

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ block0(v0: i8, v1: i8):
1717
; movq %rsp, %rbp
1818
; block0:
1919
; movq %rdi, %rax
20-
; cbw %al, %al
20+
; cbtw ;; implicit: %ax
2121
; checked_srem_seq %al, %sil, %al
2222
; shrq $8, %rax, %rax
2323
; movq %rbp, %rsp
@@ -53,7 +53,7 @@ block0(v0: i16, v1: i16):
5353
; movq %rsp, %rbp
5454
; block0:
5555
; movq %rdi, %rax
56-
; cwd %ax, %dx
56+
; cwtd ;; implicit: %dx, %ax
5757
; checked_srem_seq %ax, %dx, %si, %ax, %dx
5858
; movq %rdx, %rax
5959
; movq %rbp, %rsp
@@ -89,7 +89,7 @@ block0(v0: i32, v1: i32):
8989
; movq %rsp, %rbp
9090
; block0:
9191
; movq %rdi, %rax
92-
; cdq %eax, %edx
92+
; cltd ;; implicit: %edx, %eax
9393
; checked_srem_seq %eax, %edx, %esi, %eax, %edx
9494
; movq %rdx, %rax
9595
; movq %rbp, %rsp
@@ -125,7 +125,7 @@ block0(v0: i64, v1: i64):
125125
; movq %rsp, %rbp
126126
; block0:
127127
; movq %rdi, %rax
128-
; cqo %rax, %rdx
128+
; cqto ;; implicit: %rdx, %rax
129129
; checked_srem_seq %rax, %rdx, %rsi, %rax, %rdx
130130
; movq %rdx, %rax
131131
; movq %rbp, %rsp

cranelift/filetests/filetests/isa/x64/sdiv.clif

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ block0(v0: i8, v1: i8):
1212
; movq %rsp, %rbp
1313
; block0:
1414
; movq %rdi, %rax
15-
; cbw %al, %al
15+
; cbtw ;; implicit: %ax
1616
; testb %sil, %sil
1717
; jz #trap=int_divz
1818
; idiv %al, %sil, %al ; trap=int_ovf
@@ -46,7 +46,7 @@ block0(v0: i16, v1: i16):
4646
; movq %rsp, %rbp
4747
; block0:
4848
; movq %rdi, %rax
49-
; cwd %ax, %dx
49+
; cwtd ;; implicit: %dx, %ax
5050
; testw %si, %si
5151
; jz #trap=int_divz
5252
; idiv %ax, %dx, %si, %ax, %dx ; trap=int_ovf
@@ -80,7 +80,7 @@ block0(v0: i32, v1: i32):
8080
; movq %rsp, %rbp
8181
; block0:
8282
; movq %rdi, %rax
83-
; cdq %eax, %edx
83+
; cltd ;; implicit: %edx, %eax
8484
; testl %esi, %esi
8585
; jz #trap=int_divz
8686
; idiv %eax, %edx, %esi, %eax, %edx ; trap=int_ovf
@@ -114,7 +114,7 @@ block0(v0: i64, v1: i64):
114114
; movq %rsp, %rbp
115115
; block0:
116116
; movq %rdi, %rax
117-
; cqo %rax, %rdx
117+
; cqto ;; implicit: %rdx, %rax
118118
; testq %rsi, %rsi
119119
; jz #trap=int_divz
120120
; idiv %rax, %rdx, %rsi, %rax, %rdx ; trap=int_ovf

0 commit comments

Comments
 (0)