Skip to content

Commit ee9c795

Browse files
authored
x64: Migrate ShiftR pseudo-instruction to the new assembler (#10816)
* x64: Migrate `ShiftR` pseudo-instruction to the new assembler Shifts and rotates were all represented with `ShiftR` and they're now all represented with individual instructions. Additionally the old `Imm8{Gpr,Reg}` abstractions were largely removed and/or replace to only be helpers in ISLE. This does regress pcc a bit more than it already is at due to the shift instructions not being special-cased for pcc processing. * Remove emit tests
1 parent 6abe3c4 commit ee9c795

File tree

31 files changed

+417
-1011
lines changed

31 files changed

+417
-1011
lines changed

cranelift/assembler-x64/meta/src/generate/format.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ impl dsl::Format {
107107
fmtln!(f, "let dst = self.{dst}.enc();");
108108
fmtln!(f, "let rex = RexPrefix::two_op(digit, dst, {bits});");
109109
}
110-
[FixedReg(_), RegMem(mem)] | [FixedReg(_), FixedReg(_), RegMem(mem)] => {
110+
[FixedReg(_), RegMem(mem)]
111+
| [FixedReg(_), FixedReg(_), RegMem(mem)]
112+
| [RegMem(mem), FixedReg(_)] => {
111113
let digit = rex.digit.unwrap();
112114
fmtln!(f, "let digit = 0x{digit:x};");
113115
fmtln!(f, "let rex = self.{mem}.as_rex_prefix(digit, {bits});");
@@ -160,6 +162,7 @@ impl dsl::Format {
160162
| [RegMem(mem), Imm(_)]
161163
| [RegMem(mem)]
162164
| [FixedReg(_), RegMem(mem)]
165+
| [RegMem(mem), FixedReg(_)]
163166
| [FixedReg(_), FixedReg(_), RegMem(mem)] => {
164167
let digit = rex.digit.unwrap();
165168
fmtln!(f, "let digit = 0x{digit:x};");

cranelift/assembler-x64/meta/src/instructions/shift.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,53 @@ use crate::dsl::{align, fmt, inst, r, rex, rw};
44
#[rustfmt::skip] // Keeps instructions on a single line.
55
pub fn list() -> Vec<Inst> {
66
vec![
7+
// Note that the "M1" format of instructions is omitted here at this
8+
// time. Cranelift doesn't currently emit them and matching up the
9+
// disassembly with Capstone is nontrivial since Capstone disassembles
10+
// the "1" immediate despite it not actually being part of the
11+
// instruction encoding, so the lack of an operand here means that this
12+
// assembler wouldn't emit it leading to differences.
13+
inst("sarb", fmt("MC", [rw(rm8), r(cl)]), rex([0xD2]).digit(7), _64b | compat),
14+
inst("sarb", fmt("MI", [rw(rm8), r(imm8)]), rex([0xC0]).digit(7).ib(), _64b | compat),
15+
inst("sarw", fmt("MC", [rw(rm16), r(cl)]), rex([0x66, 0xD3]).digit(7), _64b | compat),
16+
inst("sarw", fmt("MI", [rw(rm16), r(imm8)]), rex([0x66, 0xC1]).digit(7).ib(), _64b | compat),
17+
inst("sarl", fmt("MC", [rw(rm32), r(cl)]), rex([0xD3]).digit(7), _64b | compat),
18+
inst("sarl", fmt("MI", [rw(rm32), r(imm8)]), rex([0xC1]).digit(7).ib(), _64b | compat),
19+
inst("sarq", fmt("MC", [rw(rm64), r(cl)]), rex([0xD3]).digit(7).w(), _64b),
20+
inst("sarq", fmt("MI", [rw(rm64), r(imm8)]), rex([0xC1]).digit(7).ib().w(), _64b),
21+
inst("shlb", fmt("MC", [rw(rm8), r(cl)]), rex([0xD2]).digit(4), _64b | compat),
22+
inst("shlb", fmt("MI", [rw(rm8), r(imm8)]), rex([0xC0]).digit(4).ib(), _64b | compat),
23+
inst("shlw", fmt("MC", [rw(rm16), r(cl)]), rex([0x66, 0xD3]).digit(4), _64b | compat),
24+
inst("shlw", fmt("MI", [rw(rm16), r(imm8)]), rex([0x66, 0xC1]).digit(4).ib(), _64b | compat),
25+
inst("shll", fmt("MC", [rw(rm32), r(cl)]), rex([0xD3]).digit(4), _64b | compat),
26+
inst("shll", fmt("MI", [rw(rm32), r(imm8)]), rex([0xC1]).digit(4).ib(), _64b | compat),
27+
inst("shlq", fmt("MC", [rw(rm64), r(cl)]), rex([0xD3]).digit(4).w(), _64b),
28+
inst("shlq", fmt("MI", [rw(rm64), r(imm8)]), rex([0xC1]).digit(4).ib().w(), _64b),
29+
inst("shrb", fmt("MC", [rw(rm8), r(cl)]), rex([0xD2]).digit(5), _64b | compat),
30+
inst("shrb", fmt("MI", [rw(rm8), r(imm8)]), rex([0xC0]).digit(5).ib(), _64b | compat),
31+
inst("shrw", fmt("MC", [rw(rm16), r(cl)]), rex([0x66, 0xD3]).digit(5), _64b | compat),
32+
inst("shrw", fmt("MI", [rw(rm16), r(imm8)]), rex([0x66, 0xC1]).digit(5).ib(), _64b | compat),
33+
inst("shrl", fmt("MC", [rw(rm32), r(cl)]), rex([0xD3]).digit(5), _64b | compat),
34+
inst("shrl", fmt("MI", [rw(rm32), r(imm8)]), rex([0xC1]).digit(5).ib(), _64b | compat),
35+
inst("shrq", fmt("MC", [rw(rm64), r(cl)]), rex([0xD3]).digit(5).w(), _64b),
36+
inst("shrq", fmt("MI", [rw(rm64), r(imm8)]), rex([0xC1]).digit(5).ib().w(), _64b),
37+
inst("rolb", fmt("MC", [rw(rm8), r(cl)]), rex([0xD2]).digit(0), _64b | compat),
38+
inst("rolb", fmt("MI", [rw(rm8), r(imm8)]), rex([0xC0]).digit(0).ib(), _64b | compat),
39+
inst("rolw", fmt("MC", [rw(rm16), r(cl)]), rex([0x66, 0xD3]).digit(0), _64b | compat),
40+
inst("rolw", fmt("MI", [rw(rm16), r(imm8)]), rex([0x66, 0xC1]).digit(0).ib(), _64b | compat),
41+
inst("roll", fmt("MC", [rw(rm32), r(cl)]), rex([0xD3]).digit(0), _64b | compat),
42+
inst("roll", fmt("MI", [rw(rm32), r(imm8)]), rex([0xC1]).digit(0).ib(), _64b | compat),
43+
inst("rolq", fmt("MC", [rw(rm64), r(cl)]), rex([0xD3]).digit(0).w(), _64b | compat),
44+
inst("rolq", fmt("MI", [rw(rm64), r(imm8)]), rex([0xC1]).digit(0).ib().w(), _64b | compat),
45+
inst("rorb", fmt("MC", [rw(rm8), r(cl)]), rex([0xD2]).digit(1), _64b | compat),
46+
inst("rorb", fmt("MI", [rw(rm8), r(imm8)]), rex([0xC0]).digit(1).ib(), _64b | compat),
47+
inst("rorw", fmt("MC", [rw(rm16), r(cl)]), rex([0x66, 0xD3]).digit(1), _64b | compat),
48+
inst("rorw", fmt("MI", [rw(rm16), r(imm8)]), rex([0x66, 0xC1]).digit(1).ib(), _64b | compat),
49+
inst("rorl", fmt("MC", [rw(rm32), r(cl)]), rex([0xD3]).digit(1), _64b | compat),
50+
inst("rorl", fmt("MI", [rw(rm32), r(imm8)]), rex([0xC1]).digit(1).ib(), _64b | compat),
51+
inst("rorq", fmt("MC", [rw(rm64), r(cl)]), rex([0xD3]).digit(1).w(), _64b | compat),
52+
inst("rorq", fmt("MI", [rw(rm64), r(imm8)]), rex([0xC1]).digit(1).ib().w(), _64b | compat),
53+
754
inst("shldw", fmt("MRI", [rw(rm16), r(r16), r(imm8)]), rex([0x66, 0x0F, 0xA4]).ib(), _64b | compat),
855
inst("shldw", fmt("MRC", [rw(rm16), r(r16), r(cl)]), rex([0x66, 0x0F, 0xA5]).ib(), _64b | compat),
956
inst("shldl", fmt("MRI", [rw(rm32), r(r32), r(imm8)]), rex([0x0F, 0xA4]).ib(), _64b | compat),

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 49 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,6 @@
145145
(src Gpr)
146146
(dst SyntheticAmode))
147147

148-
;; Arithmetic shifts: (shl shr sar) (b w l q) imm reg.
149-
(ShiftR (size OperandSize) ;; 1, 2, 4, or 8
150-
(kind ShiftKind)
151-
(src Gpr)
152-
;; shift count: `Imm8Gpr::Imm8(0 .. #bits-in-type - 1)` or
153-
;; `Imm8Reg::Gpr(r)` where `r` gets move mitosis'd into `%cl`.
154-
(num_bits Imm8Gpr)
155-
(dst WritableGpr))
156-
157148
;; Integer comparisons/tests: cmp or test (b w l q) (reg addr imm) reg.
158149
(CmpRmiR (size OperandSize) ;; 1, 2, 4, or 8
159150
(opcode CmpOpcode)
@@ -1181,9 +1172,9 @@
11811172
RotateLeft
11821173
RotateRight))
11831174

1184-
(type Imm8Reg extern
1175+
(type Imm8Gpr
11851176
(enum (Imm8 (imm u8))
1186-
(Reg (reg Reg))))
1177+
(Gpr (reg Gpr))))
11871178

11881179
;; Put the given clif value into a `Imm8Reg` operand, masked to the bit width of
11891180
;; the given type.
@@ -1196,7 +1187,7 @@
11961187
;; This is used when lowering various shifts and rotates.
11971188
(decl put_masked_in_imm8_gpr (Value Type) Imm8Gpr)
11981189
(rule 2 (put_masked_in_imm8_gpr (u64_from_iconst amt) ty)
1199-
(const_to_type_masked_imm8 amt ty))
1190+
(Imm8Gpr.Imm8 (u64_as_u8 (u64_and amt (shift_mask ty)))))
12001191
(rule 1 (put_masked_in_imm8_gpr amt (fits_in_16 ty))
12011192
(x64_and $I64 (value_regs_get_gpr amt 0) (RegMemImm.Imm (shift_mask ty))))
12021193
(rule (put_masked_in_imm8_gpr amt ty)
@@ -1480,7 +1471,6 @@
14801471
(type OptionWritableGpr (primitive OptionWritableGpr))
14811472
(type GprMem extern (enum))
14821473
(type GprMemImm extern (enum))
1483-
(type Imm8Gpr extern (enum))
14841474

14851475
(type Xmm (primitive Xmm))
14861476
(type WritableXmm (primitive WritableXmm))
@@ -1490,18 +1480,6 @@
14901480
(type XmmMemImm extern (enum))
14911481
(type XmmMemAlignedImm extern (enum))
14921482

1493-
;; Convert an `Imm8Reg` into an `Imm8Gpr`.
1494-
(decl imm8_reg_to_imm8_gpr (Imm8Reg) Imm8Gpr)
1495-
(extern constructor imm8_reg_to_imm8_gpr imm8_reg_to_imm8_gpr)
1496-
1497-
;; Convert an `Imm8Gpr` into a `Gpr`.
1498-
(decl gpr_from_imm8_gpr (Gpr) Imm8Gpr)
1499-
(extern extractor gpr_from_imm8_gpr gpr_from_imm8_gpr)
1500-
1501-
;; Convert an `Imm8Gpr` into an `Imm8`.
1502-
(decl imm8_from_imm8_gpr (u8) Imm8Gpr)
1503-
(extern extractor imm8_from_imm8_gpr imm8_from_imm8_gpr)
1504-
15051483
;; Convert a `WritableGpr` to a `WritableReg`.
15061484
(decl writable_gpr_to_reg (WritableGpr) WritableReg)
15071485
(extern constructor writable_gpr_to_reg writable_gpr_to_reg)
@@ -1717,11 +1695,7 @@
17171695

17181696
;; Convert a `Gpr` to an `Imm8Gpr`.
17191697
(decl gpr_to_imm8_gpr (Gpr) Imm8Gpr)
1720-
(extern constructor gpr_to_imm8_gpr gpr_to_imm8_gpr)
1721-
1722-
;; Convert an 8-bit immediate into an `Imm8Gpr`.
1723-
(decl imm8_to_imm8_gpr (u8) Imm8Gpr)
1724-
(extern constructor imm8_to_imm8_gpr imm8_to_imm8_gpr)
1698+
(rule (gpr_to_imm8_gpr gpr) (Imm8Gpr.Gpr gpr))
17251699

17261700
;; Get the low half of the given `Value` as a GPR.
17271701
(decl lo_gpr (Value) Gpr)
@@ -1837,17 +1811,6 @@
18371811

18381812
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
18391813

1840-
;; Extract a constant `Imm8Reg.Imm8` from a value operand.
1841-
(decl imm8_from_value (Imm8Reg) Value)
1842-
(extern extractor imm8_from_value imm8_from_value)
1843-
1844-
;; Mask a constant to the bit-width of the given type and package it into an
1845-
;; `Imm8Reg.Imm8`. This is used for shifts and rotates, so that we don't try and
1846-
;; shift/rotate more bits than the type has available, per Cranelift's
1847-
;; semantics.
1848-
(decl const_to_type_masked_imm8 (u64 Type) Imm8Gpr)
1849-
(extern constructor const_to_type_masked_imm8 const_to_type_masked_imm8)
1850-
18511814
;; Generate a mask for the bit-width of the given type
18521815
(decl shift_mask (Type) u8)
18531816
(extern constructor shift_mask shift_mask)
@@ -3071,59 +3034,79 @@
30713034
(let ((tmp Xmm (xmm_uninit_value)))
30723035
(x64_xor_vector ty tmp tmp)))
30733036

3074-
;; Helper for creating `MInst.ShiftR` instructions.
3075-
(decl shift_r (Type ShiftKind Gpr Imm8Gpr) Gpr)
3076-
(rule (shift_r ty kind src1 src2)
3077-
(let ((dst WritableGpr (temp_writable_gpr))
3078-
;; Use actual 8/16-bit instructions when appropriate: we
3079-
;; rely on their shift-amount-masking semantics.
3080-
(size OperandSize (raw_operand_size_of_type ty))
3081-
(_ Unit (emit (MInst.ShiftR size kind src1 src2 dst))))
3082-
dst))
3083-
30843037
;; Helper for creating `rotl` instructions.
30853038
(decl x64_rotl (Type Gpr Imm8Gpr) Gpr)
3086-
(rule (x64_rotl ty src1 src2)
3087-
(shift_r ty (ShiftKind.RotateLeft) src1 src2))
3088-
(rule 1 (x64_rotl (ty_32_or_64 ty) src (imm8_from_imm8_gpr imm))
3039+
(rule (x64_rotl $I8 src1 (Imm8Gpr.Gpr src2)) (x64_rolb_mc src1 src2))
3040+
(rule (x64_rotl $I8 src1 (Imm8Gpr.Imm8 src2)) (x64_rolb_mi src1 src2))
3041+
(rule (x64_rotl $I16 src1 (Imm8Gpr.Gpr src2)) (x64_rolw_mc src1 src2))
3042+
(rule (x64_rotl $I16 src1 (Imm8Gpr.Imm8 src2)) (x64_rolw_mi src1 src2))
3043+
(rule (x64_rotl $I32 src1 (Imm8Gpr.Gpr src2)) (x64_roll_mc src1 src2))
3044+
(rule (x64_rotl $I32 src1 (Imm8Gpr.Imm8 src2)) (x64_roll_mi src1 src2))
3045+
(rule (x64_rotl $I64 src1 (Imm8Gpr.Gpr src2)) (x64_rolq_mc src1 src2))
3046+
(rule (x64_rotl $I64 src1 (Imm8Gpr.Imm8 src2)) (x64_rolq_mi src1 src2))
3047+
(rule 1 (x64_rotl (ty_32_or_64 ty) src (Imm8Gpr.Imm8 imm))
30893048
(if-let true (use_bmi2))
30903049
(x64_rorx ty src (u8_sub (ty_bits ty) imm)))
30913050

30923051
;; Helper for creating `rotr` instructions.
30933052
(decl x64_rotr (Type Gpr Imm8Gpr) Gpr)
3094-
(rule (x64_rotr ty src1 src2)
3095-
(shift_r ty (ShiftKind.RotateRight) src1 src2))
3096-
(rule 1 (x64_rotr (ty_32_or_64 ty) src (imm8_from_imm8_gpr imm))
3053+
(rule (x64_rotr $I8 src1 (Imm8Gpr.Gpr src2)) (x64_rorb_mc src1 src2))
3054+
(rule (x64_rotr $I8 src1 (Imm8Gpr.Imm8 src2)) (x64_rorb_mi src1 src2))
3055+
(rule (x64_rotr $I16 src1 (Imm8Gpr.Gpr src2)) (x64_rorw_mc src1 src2))
3056+
(rule (x64_rotr $I16 src1 (Imm8Gpr.Imm8 src2)) (x64_rorw_mi src1 src2))
3057+
(rule (x64_rotr $I32 src1 (Imm8Gpr.Gpr src2)) (x64_rorl_mc src1 src2))
3058+
(rule (x64_rotr $I32 src1 (Imm8Gpr.Imm8 src2)) (x64_rorl_mi src1 src2))
3059+
(rule (x64_rotr $I64 src1 (Imm8Gpr.Gpr src2)) (x64_rorq_mc src1 src2))
3060+
(rule (x64_rotr $I64 src1 (Imm8Gpr.Imm8 src2)) (x64_rorq_mi src1 src2))
3061+
(rule 1 (x64_rotr (ty_32_or_64 ty) src (Imm8Gpr.Imm8 imm))
30973062
(if-let true (use_bmi2))
30983063
(x64_rorx ty src imm))
30993064

31003065
;; Helper for creating `shl` instructions.
31013066
(decl x64_shl (Type Gpr Imm8Gpr) Gpr)
3102-
(rule (x64_shl ty src1 src2)
3103-
(shift_r ty (ShiftKind.ShiftLeft) src1 src2))
3067+
(rule (x64_shl $I8 src1 (Imm8Gpr.Gpr src2)) (x64_shlb_mc src1 src2))
3068+
(rule (x64_shl $I8 src1 (Imm8Gpr.Imm8 src2)) (x64_shlb_mi src1 src2))
3069+
(rule (x64_shl $I16 src1 (Imm8Gpr.Gpr src2)) (x64_shlw_mc src1 src2))
3070+
(rule (x64_shl $I16 src1 (Imm8Gpr.Imm8 src2)) (x64_shlw_mi src1 src2))
3071+
(rule (x64_shl $I32 src1 (Imm8Gpr.Gpr src2)) (x64_shll_mc src1 src2))
3072+
(rule (x64_shl $I32 src1 (Imm8Gpr.Imm8 src2)) (x64_shll_mi src1 src2))
3073+
(rule (x64_shl $I64 src1 (Imm8Gpr.Gpr src2)) (x64_shlq_mc src1 src2))
3074+
(rule (x64_shl $I64 src1 (Imm8Gpr.Imm8 src2)) (x64_shlq_mi src1 src2))
31043075
;; With BMI2 the `shlx` instruction is also available, and it's unconditionally
31053076
;; used for registers shifted by registers since it provides more freedom
31063077
;; in regalloc since nothing is constrained. Note that the `shlx` instruction
31073078
;; doesn't encode an immediate so any immediate-based shift still uses `shl`.
3108-
(rule 1 (x64_shl (ty_32_or_64 ty) src1 (gpr_from_imm8_gpr src2))
3079+
(rule 1 (x64_shl (ty_32_or_64 ty) src1 (Imm8Gpr.Gpr src2))
31093080
(if-let true (use_bmi2))
31103081
(x64_shlx ty src1 src2))
31113082

31123083
;; Helper for creating logical shift-right instructions.
31133084
(decl x64_shr (Type Gpr Imm8Gpr) Gpr)
3114-
(rule (x64_shr ty src1 src2)
3115-
(shift_r ty (ShiftKind.ShiftRightLogical) src1 src2))
3085+
(rule (x64_shr $I8 src1 (Imm8Gpr.Gpr src2)) (x64_shrb_mc src1 src2))
3086+
(rule (x64_shr $I8 src1 (Imm8Gpr.Imm8 src2)) (x64_shrb_mi src1 src2))
3087+
(rule (x64_shr $I16 src1 (Imm8Gpr.Gpr src2)) (x64_shrw_mc src1 src2))
3088+
(rule (x64_shr $I16 src1 (Imm8Gpr.Imm8 src2)) (x64_shrw_mi src1 src2))
3089+
(rule (x64_shr $I32 src1 (Imm8Gpr.Gpr src2)) (x64_shrl_mc src1 src2))
3090+
(rule (x64_shr $I32 src1 (Imm8Gpr.Imm8 src2)) (x64_shrl_mi src1 src2))
3091+
(rule (x64_shr $I64 src1 (Imm8Gpr.Gpr src2)) (x64_shrq_mc src1 src2))
3092+
(rule (x64_shr $I64 src1 (Imm8Gpr.Imm8 src2)) (x64_shrq_mi src1 src2))
31163093
;; see `x64_shl` for more info about this rule
3117-
(rule 1 (x64_shr (ty_32_or_64 ty) src1 (gpr_from_imm8_gpr src2))
3094+
(rule 1 (x64_shr (ty_32_or_64 ty) src1 (Imm8Gpr.Gpr src2))
31183095
(if-let true (use_bmi2))
31193096
(x64_shrx ty src1 src2))
31203097

31213098
;; Helper for creating arithmetic shift-right instructions.
31223099
(decl x64_sar (Type Gpr Imm8Gpr) Gpr)
3123-
(rule (x64_sar ty src1 src2)
3124-
(shift_r ty (ShiftKind.ShiftRightArithmetic) src1 src2))
3100+
(rule (x64_sar $I8 src1 (Imm8Gpr.Gpr src2)) (x64_sarb_mc src1 src2))
3101+
(rule (x64_sar $I8 src1 (Imm8Gpr.Imm8 src2)) (x64_sarb_mi src1 src2))
3102+
(rule (x64_sar $I16 src1 (Imm8Gpr.Gpr src2)) (x64_sarw_mc src1 src2))
3103+
(rule (x64_sar $I16 src1 (Imm8Gpr.Imm8 src2)) (x64_sarw_mi src1 src2))
3104+
(rule (x64_sar $I32 src1 (Imm8Gpr.Gpr src2)) (x64_sarl_mc src1 src2))
3105+
(rule (x64_sar $I32 src1 (Imm8Gpr.Imm8 src2)) (x64_sarl_mi src1 src2))
3106+
(rule (x64_sar $I64 src1 (Imm8Gpr.Gpr src2)) (x64_sarq_mc src1 src2))
3107+
(rule (x64_sar $I64 src1 (Imm8Gpr.Imm8 src2)) (x64_sarq_mi src1 src2))
31253108
;; see `x64_shl` for more info about this rule
3126-
(rule 1 (x64_sar (ty_32_or_64 ty) src1 (gpr_from_imm8_gpr src2))
3109+
(rule 1 (x64_sar (ty_32_or_64 ty) src1 (Imm8Gpr.Gpr src2))
31273110
(if-let true (use_bmi2))
31283111
(x64_sarx ty src1 src2))
31293112

@@ -5656,7 +5639,6 @@
56565639
(convert XmmMemImm XmmMemAlignedImm xmm_mem_imm_to_xmm_mem_aligned_imm)
56575640

56585641
(convert Gpr Imm8Gpr gpr_to_imm8_gpr)
5659-
(convert Imm8Reg Imm8Gpr imm8_reg_to_imm8_gpr)
56605642

56615643
(convert Amode SyntheticAmode amode_to_synthetic_amode)
56625644
(convert Amode GprMem amode_to_gpr_mem)

0 commit comments

Comments
 (0)