Translate SIMD construction as insertelements and a single store.

huonw · huonw · commit 071c41104573 · 2014-11-05T00:02:43.000+11:00
This almost completely avoids GEPi's and pointer manipulation,
postponing it until the end with one big write of the whole vector. This
leads to a small speed-up in compilation, and makes it easier for LLVM
to work with the values, e.g. with `--opt-level=0`,

    pub fn foo() -&gt; f32x4 {
        f32x4(0.,0.,0.,0.)
    }

was previously compiled to

    define &lt;4 x float&gt; @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 {
    entry-block:
      %sret_slot = alloca &lt;4 x float&gt;
      %0 = getelementptr inbounds &lt;4 x float&gt;* %sret_slot, i32 0, i32 0
      store float 0.000000e+00, float* %0
      %1 = getelementptr inbounds &lt;4 x float&gt;* %sret_slot, i32 0, i32 1
      store float 0.000000e+00, float* %1
      %2 = getelementptr inbounds &lt;4 x float&gt;* %sret_slot, i32 0, i32 2
      store float 0.000000e+00, float* %2
      %3 = getelementptr inbounds &lt;4 x float&gt;* %sret_slot, i32 0, i32 3
      store float 0.000000e+00, float* %3
      %4 = load &lt;4 x float&gt;* %sret_slot
      ret &lt;4 x float&gt; %4
    }

but now becomes

    define &lt;4 x float&gt; @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 {
    entry-block:
      ret &lt;4 x float&gt; zeroinitializer
    }
diff --git a/src/librustc/middle/trans/expr.rs b/src/librustc/middle/trans/expr.rs
@@ -1455,14 +1455,35 @@ pub fn trans_adt<'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
         None => {}
     };
 
-    // Now, we just overwrite the fields we've explicitly specified
-    for &(i, ref e) in fields.iter() {
-        let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
-        let e_ty = expr_ty_adjusted(bcx, &**e);
-        bcx = trans_into(bcx, &**e, SaveIn(dest));
-        let scope = cleanup::CustomScope(custom_cleanup_scope);
-        fcx.schedule_lifetime_end(scope, dest);
-        fcx.schedule_drop_mem(scope, dest, e_ty);
+    if ty::type_is_simd(bcx.tcx(), ty) {
+        // This is the constructor of a SIMD type, such types are
+        // always primitive machine types and so do not have a
+        // destructor or require any clean-up.
+        let llty = type_of::type_of(bcx.ccx(), ty);
+
+        // keep a vector as a register, and running through the field
+        // `insertelement`ing them directly into that register
+        // (i.e. avoid GEPi and `store`s to an alloca) .
+        let mut vec_val = C_undef(llty);
+
+        for &(i, ref e) in fields.iter() {
+            let block_datum = trans(bcx, &**e);
+            bcx = block_datum.bcx;
+            let position = C_uint(bcx.ccx(), i);
+            let value = block_datum.datum.to_llscalarish(bcx);
+            vec_val = InsertElement(bcx, vec_val, value, position);
+        }
+        Store(bcx, vec_val, addr);
+    } else {
+        // Now, we just overwrite the fields we've explicitly specified
+        for &(i, ref e) in fields.iter() {
+            let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
+            let e_ty = expr_ty_adjusted(bcx, &**e);
+            bcx = trans_into(bcx, &**e, SaveIn(dest));
+            let scope = cleanup::CustomScope(custom_cleanup_scope);
+            fcx.schedule_lifetime_end(scope, dest);
+            fcx.schedule_drop_mem(scope, dest, e_ty);
+        }
     }
 
     adt::trans_set_discr(bcx, &*repr, addr, discr);