Skip to content

Commit 1e4c470

Browse files
authored
pulley: Add immediate payloads to more opcodes (bytecodealliance#9861)
* pulley: Add immediate payloads to more opcodes This commit adds immediate payloads to the following instructions: * `xmul32` - `xmul32_s8` / `xmul32_s32` * `xmul64` - `xmul64_s8` / `xmul64_s32` * `xband32` - `xband32_s8` / `xband32_s32` * `xband64` - `xband64_s8` / `xband64_s32` * `xbor32` - `xbor32_s8` / `xbor32_s32` * `xbor64` - `xbor64_s8` / `xbor64_s32` * `xbxor32` - `xbxor32_s8` / `xbxor32_s32` * `xbxor64` - `xbxor64_s8` / `xbxor64_s32` * `xshl32` - `xshl32_u6` * `xshl64` - `xshl64_u6` * `xshr32_u` - `xshl32_u_u6` * `xshr64_u` - `xshl64_u_u6` * `xshr32_s` - `xshl32_s_u6` * `xshr64_s` - `xshl64_s_u6` For shifts there's no need to have 32-bit immediates (or even 8-bit) since 6 bits is enough to encode all the immediates. This means that the 6-bit immediate is packed within `BinaryOperands` as a new `U6` type. This commit unfortunately does not shrink `spidermonkey.cwasm` significantly beyond the prior 29M. This is nevertheless expected to be relatively important for performance. * Fix test expectations
1 parent 7a05ab0 commit 1e4c470

File tree

19 files changed

+1289
-21
lines changed

19 files changed

+1289
-21
lines changed

cranelift/codegen/meta/src/pulley.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,14 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
137137
pat.push_str(",");
138138
format_string.push_str(&format!(" // trap={{{name}:?}}"));
139139
}
140-
Operand::Binop { .. } => {
140+
Operand::Binop { src2, .. } => {
141141
pat.push_str("dst, src1, src2,");
142142
format_string.push_str(" {dst}, {src1}, {src2}");
143143
locals.push_str(&format!("let dst = reg_name(*dst.to_reg());\n"));
144144
locals.push_str(&format!("let src1 = reg_name(**src1);\n"));
145-
locals.push_str(&format!("let src2 = reg_name(**src2);\n"));
145+
if src2.contains("Reg") {
146+
locals.push_str(&format!("let src2 = reg_name(**src2);\n"));
147+
}
146148
}
147149
}
148150
}
@@ -189,11 +191,14 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
189191
}
190192
}
191193
Operand::TrapCode { .. } => {}
192-
Operand::Binop { .. } => {
193-
pat.push_str("dst, src1, src2,");
194+
Operand::Binop { src2, .. } => {
195+
pat.push_str("dst, src1,");
194196
uses.push("src1");
195-
uses.push("src2");
196197
defs.push("dst");
198+
if src2.contains("Reg") {
199+
pat.push_str("src2,");
200+
uses.push("src2");
201+
}
197202
}
198203
}
199204
}

cranelift/codegen/src/isa/pulley_shared/inst.isle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
(rule (raw_inst_to_inst inst) (MInst.Raw inst))
127127
(convert RawInst MInst raw_inst_to_inst)
128128

129+
(type U6 (primitive U6))
129130
(type BoxCallInfo (primitive BoxCallInfo))
130131
(type BoxCallIndInfo (primitive BoxCallIndInfo))
131132
(type BoxReturnCallInfo (primitive BoxReturnCallInfo))

cranelift/codegen/src/isa/pulley_shared/lower.isle

Lines changed: 80 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,15 @@
223223
(rule (lower (has_type $I32 (imul a b))) (pulley_xmul32 a b))
224224
(rule (lower (has_type $I64 (imul a b))) (pulley_xmul64 a b))
225225

226+
(rule 1 (lower (has_type (ty_int (fits_in_32 _)) (imul a (i32_from_iconst b))))
227+
(pulley_xmul32_s32 a b))
228+
(rule 2 (lower (has_type $I64 (imul a (i32_from_iconst b))))
229+
(pulley_xmul64_s32 a b))
230+
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (imul a (i8_from_iconst b))))
231+
(pulley_xmul32_s8 a b))
232+
(rule 4 (lower (has_type $I64 (imul a (i8_from_iconst b))))
233+
(pulley_xmul64_s8 a b))
234+
226235
(rule (lower (has_type $I8X16 (imul a b))) (pulley_vmuli8x16 a b))
227236
(rule (lower (has_type $I16X8 (imul a b))) (pulley_vmuli16x8 a b))
228237
(rule (lower (has_type $I32X4 (imul a b))) (pulley_vmuli32x4 a b))
@@ -294,11 +303,31 @@
294303
(rule (lower (has_type $I64 (ishl a b)))
295304
(pulley_xshl64 a b))
296305

306+
;; Special-case constant shift amounts.
307+
(rule 1 (lower (has_type $I32 (ishl a b)))
308+
(if-let n (u6_shift_from_iconst b))
309+
(pulley_xshl32_u6 a n))
310+
(rule 1 (lower (has_type $I64 (ishl a b)))
311+
(if-let n (u6_shift_from_iconst b))
312+
(pulley_xshl64_u6 a n))
313+
314+
;; vector shifts
315+
297316
(rule (lower (has_type $I8X16 (ishl a b))) (pulley_vshli8x16 a b))
298317
(rule (lower (has_type $I16X8 (ishl a b))) (pulley_vshli16x8 a b))
299318
(rule (lower (has_type $I32X4 (ishl a b))) (pulley_vshli32x4 a b))
300319
(rule (lower (has_type $I64X2 (ishl a b))) (pulley_vshli64x2 a b))
301320

321+
;; Helper to extract a constant from `Value`, mask it to 6 bits, and then make a
322+
;; `U6`.
323+
(decl pure partial u6_shift_from_iconst (Value) U6)
324+
(rule (u6_shift_from_iconst (u64_from_iconst val))
325+
(if-let (u6_from_u8 x) (u64_as_u8 (u64_and val 0x3f)))
326+
x)
327+
328+
(decl u6_from_u8 (U6) u8)
329+
(extern extractor u6_from_u8 u6_from_u8)
330+
302331
;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
303332

304333
(rule (lower (has_type $I8 (ushr a b)))
@@ -313,6 +342,16 @@
313342
(rule (lower (has_type $I64 (ushr a b)))
314343
(pulley_xshr64_u a b))
315344

345+
;; Special-case constant shift amounts.
346+
(rule 1 (lower (has_type $I32 (ushr a b)))
347+
(if-let n (u6_shift_from_iconst b))
348+
(pulley_xshr32_u_u6 a n))
349+
(rule 1 (lower (has_type $I64 (ushr a b)))
350+
(if-let n (u6_shift_from_iconst b))
351+
(pulley_xshr64_u_u6 a n))
352+
353+
;; vector shifts
354+
316355
(rule (lower (has_type $I8X16 (ushr a b))) (pulley_vshri8x16_u a b))
317356
(rule (lower (has_type $I16X8 (ushr a b))) (pulley_vshri16x8_u a b))
318357
(rule (lower (has_type $I32X4 (ushr a b))) (pulley_vshri32x4_u a b))
@@ -332,40 +371,68 @@
332371
(rule (lower (has_type $I64 (sshr a b)))
333372
(pulley_xshr64_s a b))
334373

374+
;; Special-case constant shift amounts.
375+
(rule 1 (lower (has_type $I32 (sshr a b)))
376+
(if-let n (u6_shift_from_iconst b))
377+
(pulley_xshr32_s_u6 a n))
378+
(rule 1 (lower (has_type $I64 (sshr a b)))
379+
(if-let n (u6_shift_from_iconst b))
380+
(pulley_xshr64_s_u6 a n))
381+
382+
;; vector shifts
383+
335384
(rule (lower (has_type $I8X16 (sshr a b))) (pulley_vshri8x16_s a b))
336385
(rule (lower (has_type $I16X8 (sshr a b))) (pulley_vshri16x8_s a b))
337386
(rule (lower (has_type $I32X4 (sshr a b))) (pulley_vshri32x4_s a b))
338387
(rule (lower (has_type $I64X2 (sshr a b))) (pulley_vshri64x2_s a b))
339388

340389
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
341390

342-
(rule 0 (lower (has_type (fits_in_32 _) (band a b)))
343-
(pulley_xband32 a b))
391+
(rule 0 (lower (has_type (fits_in_32 _) (band a b))) (pulley_xband32 a b))
392+
(rule 1 (lower (has_type $I64 (band a b))) (pulley_xband64 a b))
344393

345-
(rule 1 (lower (has_type $I64 (band a b)))
346-
(pulley_xband64 a b))
394+
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (band a (i32_from_iconst b))))
395+
(pulley_xband32_s32 a b))
396+
(rule 4 (lower (has_type $I64 (band a (i32_from_iconst b))))
397+
(pulley_xband64_s32 a b))
398+
(rule 5 (lower (has_type (ty_int (fits_in_32 _)) (band a (i8_from_iconst b))))
399+
(pulley_xband32_s8 a b))
400+
(rule 6 (lower (has_type $I64 (band a (i8_from_iconst b))))
401+
(pulley_xband64_s8 a b))
347402

348403
(rule 2 (lower (has_type (ty_vec128 _) (band a b)))
349404
(pulley_vband128 a b))
350405

351406
;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
352407

353-
(rule 0 (lower (has_type (fits_in_32 _) (bor a b)))
354-
(pulley_xbor32 a b))
408+
(rule 0 (lower (has_type (fits_in_32 _) (bor a b))) (pulley_xbor32 a b))
409+
(rule 1 (lower (has_type $I64 (bor a b))) (pulley_xbor64 a b))
355410

356-
(rule 1 (lower (has_type $I64 (bor a b)))
357-
(pulley_xbor64 a b))
411+
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (bor a (i32_from_iconst b))))
412+
(pulley_xbor32_s32 a b))
413+
(rule 4 (lower (has_type $I64 (bor a (i32_from_iconst b))))
414+
(pulley_xbor64_s32 a b))
415+
(rule 5 (lower (has_type (ty_int (fits_in_32 _)) (bor a (i8_from_iconst b))))
416+
(pulley_xbor32_s8 a b))
417+
(rule 6 (lower (has_type $I64 (bor a (i8_from_iconst b))))
418+
(pulley_xbor64_s8 a b))
358419

359420
(rule 2 (lower (has_type (ty_vec128 _) (bor a b)))
360421
(pulley_vbor128 a b))
361422

362423
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
363424

364-
(rule 0 (lower (has_type (fits_in_32 _) (bxor a b)))
365-
(pulley_xbxor32 a b))
366-
367-
(rule 1 (lower (has_type $I64 (bxor a b)))
368-
(pulley_xbxor64 a b))
425+
(rule 0 (lower (has_type (fits_in_32 _) (bxor a b))) (pulley_xbxor32 a b))
426+
(rule 1 (lower (has_type $I64 (bxor a b))) (pulley_xbxor64 a b))
427+
428+
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (bxor a (i32_from_iconst b))))
429+
(pulley_xbxor32_s32 a b))
430+
(rule 4 (lower (has_type $I64 (bxor a (i32_from_iconst b))))
431+
(pulley_xbxor64_s32 a b))
432+
(rule 5 (lower (has_type (ty_int (fits_in_32 _)) (bxor a (i8_from_iconst b))))
433+
(pulley_xbxor32_s8 a b))
434+
(rule 6 (lower (has_type $I64 (bxor a (i8_from_iconst b))))
435+
(pulley_xbxor64_s8 a b))
369436

370437
(rule 2 (lower (has_type (ty_vec128 _) (bxor a b)))
371438
(pulley_vbxor128 a b))

cranelift/codegen/src/isa/pulley_shared/lower/isle.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use crate::machinst::{
2121
CallInfo, IsTailCall, MachInst, Reg, VCodeConstant, VCodeConstantData,
2222
};
2323
use alloc::boxed::Box;
24+
use pulley_interpreter::U6;
2425
use regalloc2::PReg;
2526
type Unit = ();
2627
type VecArgPair = Vec<ArgPair>;
@@ -120,6 +121,10 @@ where
120121
fn cond_invert(&mut self, cond: &Cond) -> Cond {
121122
cond.invert()
122123
}
124+
125+
fn u6_from_u8(&mut self, imm: u8) -> Option<U6> {
126+
U6::new(imm)
127+
}
123128
}
124129

125130
/// The main entry point for lowering with ISLE.

cranelift/codegen/src/isle_prelude.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,10 @@ macro_rules! isle_common_prelude_methods {
919919
val.try_into().ok()
920920
}
921921

922+
fn i32_as_i8(&mut self, val: i32) -> Option<i8> {
923+
val.try_into().ok()
924+
}
925+
922926
fn u8_as_i8(&mut self, val: u8) -> i8 {
923927
val as i8
924928
}

cranelift/codegen/src/prelude.isle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@
158158
(decl u32_as_u16 (u16) u32)
159159
(extern extractor u32_as_u16 u32_as_u16)
160160

161+
(decl i32_as_i8 (i8) i32)
162+
(extern extractor i32_as_i8 i32_as_i8)
163+
161164
(decl pure u64_as_i32 (u64) i32)
162165
(extern constructor u64_as_i32 u64_as_i32)
163166

cranelift/codegen/src/prelude_lower.isle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,10 @@
320320
(extractor (u64_from_iconst x)
321321
(def_inst (iconst (u64_from_imm64 x))))
322322

323+
(decl i8_from_iconst (i8) Value)
324+
(extractor (i8_from_iconst x)
325+
(i32_from_iconst (i32_as_i8 x)))
326+
323327
;; Extract a constant `i32` from a value defined by an `iconst`.
324328
;; The value is sign extended to 32 bits.
325329
(spec (i32_from_iconst arg)

0 commit comments

Comments
 (0)