Skip to content

Commit 00b7f8d

Browse files
authored
pulley: Implement the wide-arithmetic proposal (bytecodealliance#9944)
* pulley: Implement the wide-arithmetic proposal Add a few minor instructions/lowerings for the new operations added as part of the wide-arithmetic proposal. These are all part of the "extended" opcode set since they shouldn't be common and if they're performance critical you probably want a native backend instead. * Review comments
1 parent 43ebcb8 commit 00b7f8d

File tree

5 files changed

+157
-17
lines changed

5 files changed

+157
-17
lines changed

cranelift/codegen/meta/src/pulley.rs

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ impl Inst<'_> {
6868
let src2 = parts.next().unwrap_or(dst);
6969
Operand::Binop { dst, src1, src2 }
7070
}
71-
("dst", ty) => Operand::Writable { name, ty },
71+
(name, ty) if name.starts_with("dst") => Operand::Writable { name, ty },
7272
(name, "RegSet < XReg >") => Operand::Normal {
7373
name,
7474
ty: "XRegSet",
@@ -137,7 +137,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
137137
format_string.push_str(name);
138138
format_string.push_str("}");
139139
if ty.contains("Reg") {
140-
if name == "dst" {
140+
if matches!(op, Operand::Writable { .. }) {
141141
locals.push_str(&format!("let {name} = reg_name(*{name}.to_reg());\n"));
142142
} else {
143143
locals.push_str(&format!("let {name} = reg_name(**{name});\n"));
@@ -342,7 +342,7 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
342342
let mut rule = String::new();
343343
isle.push_str(&format!("(decl pulley_{snake_name} ("));
344344
rule.push_str(&format!("(rule (pulley_{snake_name} "));
345-
let mut result = None;
345+
let mut results = Vec::new();
346346
let mut ops = Vec::new();
347347
for op in inst.operands() {
348348
match op {
@@ -352,16 +352,14 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
352352
ops.push(name);
353353
}
354354
Operand::Writable { name: _, ty } => {
355-
assert!(result.is_none(), "{} has >1 result", inst.snake_name);
356-
result = Some(ty);
355+
results.push(ty);
357356
}
358357
Operand::Binop { dst, src1, src2 } => {
359358
isle.push_str(&format!("{src1} {src2}"));
360359
rule.push_str("src1 src2");
361360
ops.push("src1");
362361
ops.push("src2");
363-
assert!(result.is_none(), "{} has >1 result", inst.snake_name);
364-
result = Some(dst);
362+
results.push(dst);
365363
}
366364
}
367365
isle.push_str(" ");
@@ -370,8 +368,8 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
370368
isle.push_str(") ");
371369
rule.push_str(")");
372370
let ops = ops.join(" ");
373-
match result {
374-
Some(result) => {
371+
match &results[..] {
372+
[result] => {
375373
isle.push_str(result);
376374
rule.push_str(&format!(
377375
"
@@ -384,12 +382,28 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
384382
result.to_lowercase()
385383
));
386384
}
387-
None => {
385+
[a, b] => {
386+
isle.push_str("ValueRegs");
387+
rule.push_str(&format!(
388+
"
389+
(let (
390+
(dst1 Writable{a} (temp_writable_{}))
391+
(dst2 Writable{b} (temp_writable_{}))
392+
(_ Unit (emit (RawInst.{name} dst1 dst2 {ops})))
393+
)
394+
(value_regs dst1 dst2)))\
395+
\n",
396+
a.to_lowercase(),
397+
b.to_lowercase(),
398+
));
399+
}
400+
[] => {
388401
isle.push_str("SideEffectNoResult");
389402
rule.push_str(&format!(
390403
" (SideEffectNoResult.Inst (RawInst.{name} {ops})))\n",
391404
));
392405
}
406+
other => panic!("cannot codegen results {other:?}"),
393407
}
394408
isle.push_str(")\n");
395409

cranelift/codegen/src/isa/pulley_shared/lower.isle

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,17 @@
242242
(if-let neg_u32 (u32_try_from_u64 neg_u64))
243243
neg_u32)
244244

245+
;; 128-bit addition
246+
(rule 1 (lower (has_type $I128 (iadd a b)))
247+
(let ((a ValueRegs a)
248+
(b ValueRegs b))
249+
(pulley_xadd128
250+
(value_regs_get a 0)
251+
(value_regs_get a 1)
252+
(value_regs_get b 0)
253+
(value_regs_get b 1))))
254+
255+
;; vector addition
245256
(rule 1 (lower (has_type $I8X16 (iadd a b))) (pulley_vaddi8x16 a b))
246257
(rule 1 (lower (has_type $I16X8 (iadd a b))) (pulley_vaddi16x8 a b))
247258
(rule 1 (lower (has_type $I32X4 (iadd a b))) (pulley_vaddi32x4 a b))
@@ -287,6 +298,17 @@
287298
(if-let c (u8_from_negated_iconst b))
288299
(pulley_xadd64_u8 a c))
289300

301+
;; 128-bit subtraction
302+
(rule 1 (lower (has_type $I128 (isub a b)))
303+
(let ((a ValueRegs a)
304+
(b ValueRegs b))
305+
(pulley_xsub128
306+
(value_regs_get a 0)
307+
(value_regs_get a 1)
308+
(value_regs_get b 0)
309+
(value_regs_get b 1))))
310+
311+
;; vector subtraction
290312
(rule 1 (lower (has_type $I8X16 (isub a b))) (pulley_vsubi8x16 a b))
291313
(rule 1 (lower (has_type $I16X8 (isub a b))) (pulley_vsubi16x8 a b))
292314
(rule 1 (lower (has_type $I32X4 (isub a b))) (pulley_vsubi32x4 a b))
@@ -313,6 +335,13 @@
313335
(rule 4 (lower (has_type $I64 (imul a (i8_from_iconst b))))
314336
(pulley_xmul64_s8 a b))
315337

338+
;; 128-bit (or wide) multiplication
339+
(rule (lower (has_type $I128 (imul (uextend a) (uextend b))))
340+
(pulley_xwidemul64_u (zext64 a) (zext64 b)))
341+
(rule (lower (has_type $I128 (imul (sextend a) (sextend b))))
342+
(pulley_xwidemul64_s (sext64 a) (sext64 b)))
343+
344+
;; vector multiplication
316345
(rule (lower (has_type $I8X16 (imul a b))) (pulley_vmuli8x16 a b))
317346
(rule (lower (has_type $I16X8 (imul a b))) (pulley_vmuli16x8 a b))
318347
(rule (lower (has_type $I32X4 (imul a b))) (pulley_vmuli32x4 a b))

crates/wast-util/src/lib.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -304,13 +304,6 @@ impl Compiler {
304304
if config.threads() {
305305
return true;
306306
}
307-
// Unsupported proposals. Note that other proposals have partial
308-
// support at this time (pulley is a work-in-progress) and so
309-
// individual tests are listed below as "should fail" even if
310-
// they're not covered in this list.
311-
if config.wide_arithmetic() {
312-
return true;
313-
}
314307
}
315308
}
316309

pulley/src/interp.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,17 @@ impl Interpreter<'_> {
996996
}
997997
ControlFlow::Continue(())
998998
}
999+
1000+
fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1001+
let lo = self.state[lo].get_u64();
1002+
let hi = self.state[hi].get_i64();
1003+
i128::from(lo) | (i128::from(hi) << 64)
1004+
}
1005+
1006+
fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1007+
self.state[lo].set_u64(val as u64);
1008+
self.state[hi].set_u64((val >> 64) as u64);
1009+
}
9991010
}
10001011

10011012
#[test]
@@ -4811,4 +4822,64 @@ impl ExtendedOpVisitor for Interpreter<'_> {
48114822
self.state[dst].set_f64x2(a);
48124823
ControlFlow::Continue(())
48134824
}
4825+
4826+
fn xadd128(
4827+
&mut self,
4828+
dst_lo: XReg,
4829+
dst_hi: XReg,
4830+
lhs_lo: XReg,
4831+
lhs_hi: XReg,
4832+
rhs_lo: XReg,
4833+
rhs_hi: XReg,
4834+
) -> ControlFlow<Done> {
4835+
let lhs = self.get_i128(lhs_lo, lhs_hi);
4836+
let rhs = self.get_i128(rhs_lo, rhs_hi);
4837+
let result = lhs.wrapping_add(rhs);
4838+
self.set_i128(dst_lo, dst_hi, result);
4839+
ControlFlow::Continue(())
4840+
}
4841+
4842+
fn xsub128(
4843+
&mut self,
4844+
dst_lo: XReg,
4845+
dst_hi: XReg,
4846+
lhs_lo: XReg,
4847+
lhs_hi: XReg,
4848+
rhs_lo: XReg,
4849+
rhs_hi: XReg,
4850+
) -> ControlFlow<Done> {
4851+
let lhs = self.get_i128(lhs_lo, lhs_hi);
4852+
let rhs = self.get_i128(rhs_lo, rhs_hi);
4853+
let result = lhs.wrapping_sub(rhs);
4854+
self.set_i128(dst_lo, dst_hi, result);
4855+
ControlFlow::Continue(())
4856+
}
4857+
4858+
fn xwidemul64_s(
4859+
&mut self,
4860+
dst_lo: XReg,
4861+
dst_hi: XReg,
4862+
lhs: XReg,
4863+
rhs: XReg,
4864+
) -> ControlFlow<Done> {
4865+
let lhs = self.state[lhs].get_i64();
4866+
let rhs = self.state[rhs].get_i64();
4867+
let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
4868+
self.set_i128(dst_lo, dst_hi, result);
4869+
ControlFlow::Continue(())
4870+
}
4871+
4872+
fn xwidemul64_u(
4873+
&mut self,
4874+
dst_lo: XReg,
4875+
dst_hi: XReg,
4876+
lhs: XReg,
4877+
rhs: XReg,
4878+
) -> ControlFlow<Done> {
4879+
let lhs = self.state[lhs].get_u64();
4880+
let rhs = self.state[rhs].get_u64();
4881+
let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
4882+
self.set_i128(dst_lo, dst_hi, result as i128);
4883+
ControlFlow::Continue(())
4884+
}
48144885
}

pulley/src/lib.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,6 +1284,39 @@ macro_rules! for_each_extended_op {
12841284
vfma32x4 = Vfma32x4 { dst: VReg, a: VReg, b: VReg, c: VReg };
12851285
/// `dst = ieee_fma(a, b, c)`
12861286
vfma64x2 = Vfma64x2 { dst: VReg, a: VReg, b: VReg, c: VReg };
1287+
1288+
/// `dst_hi:dst_lo = lhs_hi:lhs_lo + rhs_hi:rhs_lo`
1289+
xadd128 = Xadd128 {
1290+
dst_lo: XReg,
1291+
dst_hi: XReg,
1292+
lhs_lo: XReg,
1293+
lhs_hi: XReg,
1294+
rhs_lo: XReg,
1295+
rhs_hi: XReg
1296+
};
1297+
/// `dst_hi:dst_lo = lhs_hi:lhs_lo - rhs_hi:rhs_lo`
1298+
xsub128 = Xsub128 {
1299+
dst_lo: XReg,
1300+
dst_hi: XReg,
1301+
lhs_lo: XReg,
1302+
lhs_hi: XReg,
1303+
rhs_lo: XReg,
1304+
rhs_hi: XReg
1305+
};
1306+
/// `dst_hi:dst_lo = sext(lhs) * sext(rhs)`
1307+
xwidemul64_s = Xwidemul64S {
1308+
dst_lo: XReg,
1309+
dst_hi: XReg,
1310+
lhs: XReg,
1311+
rhs: XReg
1312+
};
1313+
/// `dst_hi:dst_lo = zext(lhs) * zext(rhs)`
1314+
xwidemul64_u = Xwidemul64U {
1315+
dst_lo: XReg,
1316+
dst_hi: XReg,
1317+
lhs: XReg,
1318+
rhs: XReg
1319+
};
12871320
}
12881321
};
12891322
}

0 commit comments

Comments
 (0)