Skip to content

Commit 071c411

Browse files
committed
Translate SIMD construction as insertelements and a single store.
This almost completely avoids GEPi's and pointer manipulation, postponing it until the end with one big write of the whole vector. This leads to a small speed-up in compilation, and makes it easier for LLVM to work with the values, e.g. with `--opt-level=0`, pub fn foo() -> f32x4 { f32x4(0.,0.,0.,0.) } was previously compiled to define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 { entry-block: %sret_slot = alloca <4 x float> %0 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 0 store float 0.000000e+00, float* %0 %1 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 1 store float 0.000000e+00, float* %1 %2 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 2 store float 0.000000e+00, float* %2 %3 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 3 store float 0.000000e+00, float* %3 %4 = load <4 x float>* %sret_slot ret <4 x float> %4 } but now becomes define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 { entry-block: ret <4 x float> zeroinitializer }
1 parent ff50f24 commit 071c411

File tree

1 file changed

+29
-8
lines changed

1 file changed

+29
-8
lines changed

src/librustc/middle/trans/expr.rs

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,14 +1455,35 @@ pub fn trans_adt<'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
14551455
None => {}
14561456
};
14571457

1458-
// Now, we just overwrite the fields we've explicitly specified
1459-
for &(i, ref e) in fields.iter() {
1460-
let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
1461-
let e_ty = expr_ty_adjusted(bcx, &**e);
1462-
bcx = trans_into(bcx, &**e, SaveIn(dest));
1463-
let scope = cleanup::CustomScope(custom_cleanup_scope);
1464-
fcx.schedule_lifetime_end(scope, dest);
1465-
fcx.schedule_drop_mem(scope, dest, e_ty);
1458+
if ty::type_is_simd(bcx.tcx(), ty) {
1459+
// This is the constructor of a SIMD type, such types are
1460+
// always primitive machine types and so do not have a
1461+
// destructor or require any clean-up.
1462+
let llty = type_of::type_of(bcx.ccx(), ty);
1463+
1464+
// keep a vector as a register, and running through the field
1465+
// `insertelement`ing them directly into that register
1466+
// (i.e. avoid GEPi and `store`s to an alloca) .
1467+
let mut vec_val = C_undef(llty);
1468+
1469+
for &(i, ref e) in fields.iter() {
1470+
let block_datum = trans(bcx, &**e);
1471+
bcx = block_datum.bcx;
1472+
let position = C_uint(bcx.ccx(), i);
1473+
let value = block_datum.datum.to_llscalarish(bcx);
1474+
vec_val = InsertElement(bcx, vec_val, value, position);
1475+
}
1476+
Store(bcx, vec_val, addr);
1477+
} else {
1478+
// Now, we just overwrite the fields we've explicitly specified
1479+
for &(i, ref e) in fields.iter() {
1480+
let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
1481+
let e_ty = expr_ty_adjusted(bcx, &**e);
1482+
bcx = trans_into(bcx, &**e, SaveIn(dest));
1483+
let scope = cleanup::CustomScope(custom_cleanup_scope);
1484+
fcx.schedule_lifetime_end(scope, dest);
1485+
fcx.schedule_drop_mem(scope, dest, e_ty);
1486+
}
14661487
}
14671488

14681489
adt::trans_set_discr(bcx, &*repr, addr, discr);

0 commit comments

Comments
 (0)