Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit 2633024

Browse files
committed
Don't monomorphize the simd helpers for each closure
This halves the total amount of llvm ir lines for simd related functions from 18227 to 9604.
1 parent b60eced commit 2633024

File tree

3 files changed

+29
-29
lines changed

3 files changed

+29
-29
lines changed

src/intrinsics/llvm.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
7373
kind => unreachable!("kind {:?}", kind),
7474
};
7575

76-
simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
76+
simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
7777
let res_lane = match lane_layout.ty.kind() {
7878
ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
7979
_ => unreachable!("{:?}", lane_layout.ty),
@@ -83,7 +83,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
8383
};
8484
"llvm.x86.sse2.psrli.d", (c a, o imm8) {
8585
let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
86-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| {
86+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
8787
match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
8888
imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
8989
_ => fx.bcx.ins().iconst(types::I32, 0),
@@ -92,7 +92,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
9292
};
9393
"llvm.x86.sse2.pslli.d", (c a, o imm8) {
9494
let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
95-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| {
95+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
9696
match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
9797
imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
9898
_ => fx.bcx.ins().iconst(types::I32, 0),

src/intrinsics/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ fn simd_for_each_lane<'tcx>(
108108
fx: &mut FunctionCx<'_, '_, 'tcx>,
109109
val: CValue<'tcx>,
110110
ret: CPlace<'tcx>,
111-
f: impl Fn(
111+
f: &dyn Fn(
112112
&mut FunctionCx<'_, '_, 'tcx>,
113113
TyAndLayout<'tcx>,
114114
TyAndLayout<'tcx>,
@@ -138,7 +138,7 @@ fn simd_pair_for_each_lane<'tcx>(
138138
x: CValue<'tcx>,
139139
y: CValue<'tcx>,
140140
ret: CPlace<'tcx>,
141-
f: impl Fn(
141+
f: &dyn Fn(
142142
&mut FunctionCx<'_, '_, 'tcx>,
143143
TyAndLayout<'tcx>,
144144
TyAndLayout<'tcx>,
@@ -171,7 +171,7 @@ fn simd_reduce<'tcx>(
171171
val: CValue<'tcx>,
172172
acc: Option<Value>,
173173
ret: CPlace<'tcx>,
174-
f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
174+
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
175175
) {
176176
let (lane_count, lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
177177
let lane_layout = fx.layout_of(lane_ty);
@@ -192,7 +192,7 @@ fn simd_reduce_bool<'tcx>(
192192
fx: &mut FunctionCx<'_, '_, 'tcx>,
193193
val: CValue<'tcx>,
194194
ret: CPlace<'tcx>,
195-
f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, Value, Value) -> Value,
195+
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Value, Value) -> Value,
196196
) {
197197
let (lane_count, _lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
198198
assert!(ret.layout().ty.is_bool());

src/intrinsics/simd.rs

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident)
2222
$x,
2323
$y,
2424
$ret,
25-
|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
25+
&|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
2626
let res_lane = match lane_layout.ty.kind() {
2727
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
2828
ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
@@ -45,7 +45,7 @@ macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $r
4545
$x,
4646
$y,
4747
$ret,
48-
|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
48+
&|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
4949
match lane_layout.ty.kind() {
5050
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
5151
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
@@ -62,7 +62,7 @@ macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident,
6262
$x,
6363
$y,
6464
$ret,
65-
|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
65+
&|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
6666
match lane_layout.ty.kind() {
6767
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
6868
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
@@ -80,7 +80,7 @@ macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
8080
$x,
8181
$y,
8282
$ret,
83-
|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
83+
&|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
8484
match lane_layout.ty.kind() {
8585
ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
8686
_ => unreachable!("{:?}", lane_layout.ty),
@@ -105,7 +105,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
105105

106106
simd_cast, (c a) {
107107
validate_simd_type(fx, intrinsic, span, a.layout().ty);
108-
simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
108+
simd_for_each_lane(fx, a, ret, &|fx, lane_layout, ret_lane_layout, lane| {
109109
let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();
110110

111111
let from_signed = type_sign(lane_layout.ty);
@@ -277,7 +277,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
277277

278278
simd_neg, (c a) {
279279
validate_simd_type(fx, intrinsic, span, a.layout().ty);
280-
simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| {
280+
simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
281281
match lane_layout.ty.kind() {
282282
ty::Int(_) => fx.bcx.ins().ineg(lane),
283283
ty::Float(_) => fx.bcx.ins().fneg(lane),
@@ -288,14 +288,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
288288

289289
simd_fabs, (c a) {
290290
validate_simd_type(fx, intrinsic, span, a.layout().ty);
291-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
291+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
292292
fx.bcx.ins().fabs(lane)
293293
});
294294
};
295295

296296
simd_fsqrt, (c a) {
297297
validate_simd_type(fx, intrinsic, span, a.layout().ty);
298-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
298+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
299299
fx.bcx.ins().sqrt(lane)
300300
});
301301
};
@@ -318,7 +318,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
318318
};
319319
simd_rem, (c x, c y) {
320320
validate_simd_type(fx, intrinsic, span, x.layout().ty);
321-
simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
321+
simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
322322
match lane_layout.ty.kind() {
323323
ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
324324
ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
@@ -393,7 +393,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
393393

394394
simd_round, (c a) {
395395
validate_simd_type(fx, intrinsic, span, a.layout().ty);
396-
simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| {
396+
simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
397397
match lane_layout.ty.kind() {
398398
ty::Float(FloatTy::F32) => fx.lib_call(
399399
"roundf",
@@ -413,26 +413,26 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
413413
};
414414
simd_ceil, (c a) {
415415
validate_simd_type(fx, intrinsic, span, a.layout().ty);
416-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
416+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
417417
fx.bcx.ins().ceil(lane)
418418
});
419419
};
420420
simd_floor, (c a) {
421421
validate_simd_type(fx, intrinsic, span, a.layout().ty);
422-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
422+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
423423
fx.bcx.ins().floor(lane)
424424
});
425425
};
426426
simd_trunc, (c a) {
427427
validate_simd_type(fx, intrinsic, span, a.layout().ty);
428-
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
428+
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
429429
fx.bcx.ins().trunc(lane)
430430
});
431431
};
432432

433433
simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
434434
validate_simd_type(fx, intrinsic, span, v.layout().ty);
435-
simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
435+
simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
436436
if lane_layout.ty.is_floating_point() {
437437
fx.bcx.ins().fadd(a, b)
438438
} else {
@@ -443,7 +443,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
443443

444444
simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) {
445445
validate_simd_type(fx, intrinsic, span, v.layout().ty);
446-
simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
446+
simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
447447
if lane_layout.ty.is_floating_point() {
448448
fx.bcx.ins().fmul(a, b)
449449
} else {
@@ -454,32 +454,32 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
454454

455455
simd_reduce_all, (c v) {
456456
validate_simd_type(fx, intrinsic, span, v.layout().ty);
457-
simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b));
457+
simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().band(a, b));
458458
};
459459

460460
simd_reduce_any, (c v) {
461461
validate_simd_type(fx, intrinsic, span, v.layout().ty);
462-
simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
462+
simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().bor(a, b));
463463
};
464464

465465
simd_reduce_and, (c v) {
466466
validate_simd_type(fx, intrinsic, span, v.layout().ty);
467-
simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b));
467+
simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().band(a, b));
468468
};
469469

470470
simd_reduce_or, (c v) {
471471
validate_simd_type(fx, intrinsic, span, v.layout().ty);
472-
simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b));
472+
simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bor(a, b));
473473
};
474474

475475
simd_reduce_xor, (c v) {
476476
validate_simd_type(fx, intrinsic, span, v.layout().ty);
477-
simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
477+
simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
478478
};
479479

480480
simd_reduce_min, (c v) {
481481
validate_simd_type(fx, intrinsic, span, v.layout().ty);
482-
simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
482+
simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
483483
let lt = match layout.ty.kind() {
484484
ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
485485
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
@@ -492,7 +492,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
492492

493493
simd_reduce_max, (c v) {
494494
validate_simd_type(fx, intrinsic, span, v.layout().ty);
495-
simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
495+
simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
496496
let gt = match layout.ty.kind() {
497497
ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
498498
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),

0 commit comments

Comments
 (0)