Skip to content

Commit 2633024

Browse files
committed
Don't monomorphize the simd helpers for each closure
This halves the total amount of llvm ir lines for simd related functions from 18227 to 9604.
1 parent b60eced commit 2633024

File tree

3 files changed

+29
-29
lines changed

3 files changed

+29
-29
lines changed

src/intrinsics/llvm.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
7373
kind => unreachable!("kind {:?}", kind),
7474
};
7575

76-
simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
76+
simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
7777
let res_lane = match lane_layout.ty.kind() {
7878
ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
7979
_ => unreachable!("{:?}", lane_layout.ty),
@@ -83,7 +83,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
8383
};
8484
"llvm.x86.sse2.psrli.d", (c a, o imm8) {
8585
let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
86-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| {
86+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
8787
match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
8888
imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
8989
_ => fx.bcx.ins().iconst(types::I32, 0),
@@ -92,7 +92,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
9292
};
9393
"llvm.x86.sse2.pslli.d", (c a, o imm8) {
9494
let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
95-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| {
95+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
9696
match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
9797
imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
9898
_ => fx.bcx.ins().iconst(types::I32, 0),

src/intrinsics/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ fn simd_for_each_lane<'tcx>(
108108
fx: &mut FunctionCx<'_, '_, 'tcx>,
109109
val: CValue<'tcx>,
110110
ret: CPlace<'tcx>,
111-
f: impl Fn(
111+
f: &dyn Fn(
112112
&mut FunctionCx<'_, '_, 'tcx>,
113113
TyAndLayout<'tcx>,
114114
TyAndLayout<'tcx>,
@@ -138,7 +138,7 @@ fn simd_pair_for_each_lane<'tcx>(
138138
x: CValue<'tcx>,
139139
y: CValue<'tcx>,
140140
ret: CPlace<'tcx>,
141-
f: impl Fn(
141+
f: &dyn Fn(
142142
&mut FunctionCx<'_, '_, 'tcx>,
143143
TyAndLayout<'tcx>,
144144
TyAndLayout<'tcx>,
@@ -171,7 +171,7 @@ fn simd_reduce<'tcx>(
171171
val: CValue<'tcx>,
172172
acc: Option<Value>,
173173
ret: CPlace<'tcx>,
174-
f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
174+
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
175175
) {
176176
let (lane_count, lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
177177
let lane_layout = fx.layout_of(lane_ty);
@@ -192,7 +192,7 @@ fn simd_reduce_bool<'tcx>(
192192
fx: &mut FunctionCx<'_, '_, 'tcx>,
193193
val: CValue<'tcx>,
194194
ret: CPlace<'tcx>,
195-
f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, Value, Value) -> Value,
195+
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Value, Value) -> Value,
196196
) {
197197
let (lane_count, _lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
198198
assert!(ret.layout().ty.is_bool());

src/intrinsics/simd.rs

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident)
2222
$x,
2323
$y,
2424
$ret,
25-
|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
25+
&|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
2626
let res_lane = match lane_layout.ty.kind() {
2727
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
2828
ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
@@ -45,7 +45,7 @@ macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $r
4545
$x,
4646
$y,
4747
$ret,
48-
|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
48+
&|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
4949
match lane_layout.ty.kind() {
5050
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
5151
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
@@ -62,7 +62,7 @@ macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident,
6262
$x,
6363
$y,
6464
$ret,
65-
|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
65+
&|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
6666
match lane_layout.ty.kind() {
6767
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
6868
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
@@ -80,7 +80,7 @@ macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
8080
$x,
8181
$y,
8282
$ret,
83-
|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
83+
&|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
8484
match lane_layout.ty.kind() {
8585
ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
8686
_ => unreachable!("{:?}", lane_layout.ty),
@@ -105,7 +105,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
105105

106106
simd_cast, (c a) {
107107
validate_simd_type(fx, intrinsic, span, a.layout().ty);
108-
simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
108+
simd_for_each_lane(fx, a, ret, &|fx, lane_layout, ret_lane_layout, lane| {
109109
let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();
110110

111111
let from_signed = type_sign(lane_layout.ty);
@@ -277,7 +277,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
277277

278278
simd_neg, (c a) {
279279
validate_simd_type(fx, intrinsic, span, a.layout().ty);
280-
simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| {
280+
simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
281281
match lane_layout.ty.kind() {
282282
ty::Int(_) => fx.bcx.ins().ineg(lane),
283283
ty::Float(_) => fx.bcx.ins().fneg(lane),
@@ -288,14 +288,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
288288

289289
simd_fabs, (c a) {
290290
validate_simd_type(fx, intrinsic, span, a.layout().ty);
291-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
291+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
292292
fx.bcx.ins().fabs(lane)
293293
});
294294
};
295295

296296
simd_fsqrt, (c a) {
297297
validate_simd_type(fx, intrinsic, span, a.layout().ty);
298-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
298+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
299299
fx.bcx.ins().sqrt(lane)
300300
});
301301
};
@@ -318,7 +318,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
318318
};
319319
simd_rem, (c x, c y) {
320320
validate_simd_type(fx, intrinsic, span, x.layout().ty);
321-
simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
321+
simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
322322
match lane_layout.ty.kind() {
323323
ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
324324
ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
@@ -393,7 +393,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
393393

394394
simd_round, (c a) {
395395
validate_simd_type(fx, intrinsic, span, a.layout().ty);
396-
simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| {
396+
simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
397397
match lane_layout.ty.kind() {
398398
ty::Float(FloatTy::F32) => fx.lib_call(
399399
"roundf",
@@ -413,26 +413,26 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
413413
};
414414
simd_ceil, (c a) {
415415
validate_simd_type(fx, intrinsic, span, a.layout().ty);
416-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
416+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
417417
fx.bcx.ins().ceil(lane)
418418
});
419419
};
420420
simd_floor, (c a) {
421421
validate_simd_type(fx, intrinsic, span, a.layout().ty);
422-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
422+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
423423
fx.bcx.ins().floor(lane)
424424
});
425425
};
426426
simd_trunc, (c a) {
427427
validate_simd_type(fx, intrinsic, span, a.layout().ty);
428-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
428+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
429429
fx.bcx.ins().trunc(lane)
430430
});
431431
};
432432

433433
simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
434434
validate_simd_type(fx, intrinsic, span, v.layout().ty);
435-
simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
435+
simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
436436
if lane_layout.ty.is_floating_point() {
437437
fx.bcx.ins().fadd(a, b)
438438
} else {
@@ -443,7 +443,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
443443

444444
simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) {
445445
validate_simd_type(fx, intrinsic, span, v.layout().ty);
446-
simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
446+
simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
447447
if lane_layout.ty.is_floating_point() {
448448
fx.bcx.ins().fmul(a, b)
449449
} else {
@@ -454,32 +454,32 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
454454

455455
simd_reduce_all, (c v) {
456456
validate_simd_type(fx, intrinsic, span, v.layout().ty);
457-
simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b));
457+
simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().band(a, b));
458458
};
459459

460460
simd_reduce_any, (c v) {
461461
validate_simd_type(fx, intrinsic, span, v.layout().ty);
462-
simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
462+
simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().bor(a, b));
463463
};
464464

465465
simd_reduce_and, (c v) {
466466
validate_simd_type(fx, intrinsic, span, v.layout().ty);
467-
simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b));
467+
simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().band(a, b));
468468
};
469469

470470
simd_reduce_or, (c v) {
471471
validate_simd_type(fx, intrinsic, span, v.layout().ty);
472-
simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b));
472+
simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bor(a, b));
473473
};
474474

475475
simd_reduce_xor, (c v) {
476476
validate_simd_type(fx, intrinsic, span, v.layout().ty);
477-
simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
477+
simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
478478
};
479479

480480
simd_reduce_min, (c v) {
481481
validate_simd_type(fx, intrinsic, span, v.layout().ty);
482-
simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
482+
simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
483483
let lt = match layout.ty.kind() {
484484
ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
485485
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
@@ -492,7 +492,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
492492

493493
simd_reduce_max, (c v) {
494494
validate_simd_type(fx, intrinsic, span, v.layout().ty);
495-
simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
495+
simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
496496
let gt = match layout.ty.kind() {
497497
ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
498498
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),

0 commit comments

Comments
 (0)