Skip to content

Commit 41a90fb

Browse files
committed
x86-sse2 ABI: use SSE registers for floats and SIMD
1 parent 929b055 commit 41a90fb

File tree

8 files changed

+201
-119
lines changed

8 files changed

+201
-119
lines changed

compiler/rustc_target/src/callconv/mod.rs

Lines changed: 75 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::abi::{
99
self, AddressSpace, Align, BackendRepr, HasDataLayout, Pointer, Size, TyAbiInterface,
1010
TyAndLayout,
1111
};
12-
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
12+
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};
1313

1414
mod aarch64;
1515
mod amdgpu;
@@ -388,6 +388,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
388388
/// Pass this argument directly instead. Should NOT be used!
389389
/// Only exists because of past ABI mistakes that will take time to fix
390390
/// (see <https://github.com/rust-lang/rust/issues/115666>).
391+
#[track_caller]
391392
pub fn make_direct_deprecated(&mut self) {
392393
match self.mode {
393394
PassMode::Indirect { .. } => {
@@ -400,6 +401,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
400401

401402
/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.
402403
/// This is valid for both sized and unsized arguments.
404+
#[track_caller]
403405
pub fn make_indirect(&mut self) {
404406
match self.mode {
405407
PassMode::Direct(_) | PassMode::Pair(_, _) => {
@@ -414,6 +416,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
414416

415417
/// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass
416418
/// ZSTs indirectly.
419+
#[track_caller]
417420
pub fn make_indirect_from_ignore(&mut self) {
418421
match self.mode {
419422
PassMode::Ignore => {
@@ -737,26 +740,47 @@ impl<'a, Ty> FnAbi<'a, Ty> {
737740
C: HasDataLayout + HasTargetSpec,
738741
{
739742
let spec = cx.target_spec();
740-
match &spec.arch[..] {
743+
match &*spec.arch {
741744
"x86" => x86::compute_rust_abi_info(cx, self, abi),
742745
"riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi),
743746
"loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi),
744747
"aarch64" => aarch64::compute_rust_abi_info(cx, self),
745748
_ => {}
746749
};
747750

751+
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
752+
// May only return `true` if the target will always pass those arguments the same way,
753+
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
754+
// target features are required to pass a SIMD value in registers must be listed in
755+
// the `abi_required_features` for the current target and ABI.
756+
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
757+
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
758+
// to 128-bit-sized vectors.
759+
"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,
760+
"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {
761+
arg.layout.size.bits() <= 128
762+
}
763+
// So far, we haven't implemented this logic for any other target.
764+
_ => false,
765+
};
766+
748767
for (arg_idx, arg) in self
749768
.args
750769
.iter_mut()
751770
.enumerate()
752771
.map(|(idx, arg)| (Some(idx), arg))
753772
.chain(iter::once((None, &mut self.ret)))
754773
{
755-
if arg.is_ignore() {
774+
// If the logic above already picked a specific type to cast the argument to, leave that
775+
// in place.
776+
if matches!(arg.mode, PassMode::Ignore | PassMode::Cast { .. }) {
756777
continue;
757778
}
758779

759-
if arg_idx.is_none() && arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2 {
780+
if arg_idx.is_none()
781+
&& arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2
782+
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
783+
{
760784
// Return values larger than 2 registers using a return area
761785
// pointer. LLVM and Cranelift disagree about how to return
762786
// values that don't fit in the registers designated for return
@@ -765,7 +789,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {
765789
// return value independently and decide to pass it in a
766790
// register or not, which would result in the return value
767791
// being passed partially in registers and partially through a
768-
// return area pointer.
792+
// return area pointer. For large IR-level values such as `i128`,
793+
// cranelift will even split up the value into smaller chunks.
769794
//
770795
// While Cranelift may need to be fixed as the LLVM behavior is
771796
// generally more correct with respect to the surface language,
@@ -795,53 +820,58 @@ impl<'a, Ty> FnAbi<'a, Ty> {
795820
// rustc_target already ensure any return value which doesn't
796821
// fit in the available amount of return registers is passed in
797822
// the right way for the current target.
823+
//
824+
// The adjustment is not necessary nor desired for types with a vector
825+
// representation; those are handled below.
798826
arg.make_indirect();
799827
continue;
800828
}
801829

802830
match arg.layout.backend_repr {
803-
BackendRepr::Memory { .. } => {}
804-
805-
// This is a fun case! The gist of what this is doing is
806-
// that we want callers and callees to always agree on the
807-
// ABI of how they pass SIMD arguments. If we were to *not*
808-
// make these arguments indirect then they'd be immediates
809-
// in LLVM, which means that they'd used whatever the
810-
// appropriate ABI is for the callee and the caller. That
811-
// means, for example, if the caller doesn't have AVX
812-
// enabled but the callee does, then passing an AVX argument
813-
// across this boundary would cause corrupt data to show up.
814-
//
815-
// This problem is fixed by unconditionally passing SIMD
816-
// arguments through memory between callers and callees
817-
// which should get them all to agree on ABI regardless of
818-
// target feature sets. Some more information about this
819-
// issue can be found in #44367.
820-
//
821-
// Note that the intrinsic ABI is exempt here as
822-
// that's how we connect up to LLVM and it's unstable
823-
// anyway, we control all calls to it in libstd.
824-
BackendRepr::Vector { .. }
825-
if abi != ExternAbi::RustIntrinsic && spec.simd_types_indirect =>
826-
{
827-
arg.make_indirect();
828-
continue;
831+
BackendRepr::Memory { .. } => {
832+
// Compute `Aggregate` ABI.
833+
834+
let is_indirect_not_on_stack =
835+
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
836+
assert!(is_indirect_not_on_stack);
837+
838+
let size = arg.layout.size;
839+
if arg.layout.is_sized() && size <= Pointer(AddressSpace::DATA).size(cx) {
840+
// We want to pass small aggregates as immediates, but using
841+
// an LLVM aggregate type for this leads to bad optimizations,
842+
// so we pick an appropriately sized integer type instead.
843+
arg.cast_to(Reg { kind: RegKind::Integer, size });
844+
}
829845
}
830846

831-
_ => continue,
832-
}
833-
// Compute `Aggregate` ABI.
834-
835-
let is_indirect_not_on_stack =
836-
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
837-
assert!(is_indirect_not_on_stack);
838-
839-
let size = arg.layout.size;
840-
if !arg.layout.is_unsized() && size <= Pointer(AddressSpace::DATA).size(cx) {
841-
// We want to pass small aggregates as immediates, but using
842-
// an LLVM aggregate type for this leads to bad optimizations,
843-
// so we pick an appropriately sized integer type instead.
844-
arg.cast_to(Reg { kind: RegKind::Integer, size });
847+
BackendRepr::Vector { .. } => {
848+
// This is a fun case! The gist of what this is doing is
849+
// that we want callers and callees to always agree on the
850+
// ABI of how they pass SIMD arguments. If we were to *not*
851+
// make these arguments indirect then they'd be immediates
852+
// in LLVM, which means that they'd used whatever the
853+
// appropriate ABI is for the callee and the caller. That
854+
// means, for example, if the caller doesn't have AVX
855+
// enabled but the callee does, then passing an AVX argument
856+
// across this boundary would cause corrupt data to show up.
857+
//
858+
// This problem is fixed by unconditionally passing SIMD
859+
// arguments through memory between callers and callees
860+
// which should get them all to agree on ABI regardless of
861+
// target feature sets. Some more information about this
862+
// issue can be found in #44367.
863+
//
864+
// Note that the intrinsic ABI is exempt here as those are not
865+
// real functions anyway, and the backend expects very specific types.
866+
if abi != ExternAbi::RustIntrinsic
867+
&& spec.simd_types_indirect
868+
&& !can_pass_simd_directly(arg)
869+
{
870+
arg.make_indirect();
871+
}
872+
}
873+
874+
_ => {}
845875
}
846876
}
847877
}

compiler/rustc_target/src/callconv/x86.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use crate::abi::call::{ArgAttribute, FnAbi, PassMode, Reg, RegKind};
22
use crate::abi::{
33
AddressSpace, Align, BackendRepr, Float, HasDataLayout, Pointer, TyAbiInterface, TyAndLayout,
44
};
5-
use crate::spec::HasTargetSpec;
65
use crate::spec::abi::Abi as SpecAbi;
6+
use crate::spec::{HasTargetSpec, RustcAbi};
77

88
#[derive(PartialEq)]
99
pub(crate) enum Flavor {
@@ -234,8 +234,16 @@ where
234234
_ => false, // anyway not passed via registers on x86
235235
};
236236
if has_float {
237-
if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) {
238-
// Same size or smaller than pointer, return in a register.
237+
if cx.target_spec().rustc_abi == Some(RustcAbi::X86Sse2)
238+
&& fn_abi.ret.layout.backend_repr.is_scalar()
239+
&& fn_abi.ret.layout.size.bits() <= 128
240+
{
241+
// This is a single scalar that fits into an SSE register, and the target uses the
242+
// SSE ABI. We prefer this over integer registers as float scalars need to be in SSE
243+
// registers for float operations, so that's the best place to pass them around.
244+
fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
245+
} else if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) {
246+
// Same size or smaller than pointer, return in an integer register.
239247
fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size });
240248
} else {
241249
// Larger than a pointer, return indirectly.

tests/assembly/closure-inherit-target-feature.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128) -> __m128 {
1515
// check that _mm_blend_ps is not being inlined into the closure
1616
// CHECK-LABEL: {{sse41_blend_nofeature.*closure.*:}}
1717
// CHECK-NOT: blendps
18-
// CHECK: {{call .*_mm_blend_ps.*}}
18+
// CHECK: {{jmp .*_mm_blend_ps.*}}
1919
// CHECK-NOT: blendps
20-
// CHECK: ret
2120
#[inline(never)]
2221
|x, y| _mm_blend_ps(x, y, 0b0101)
2322
};

0 commit comments

Comments
 (0)