Skip to content

Commit 2763963

Browse files
eduardosmAmanieu
authored andcommitted
Reimplement _mm_blendv_pd and _mm256_blendv_pd without LLVM intrinsics
1 parent 935b7de commit 2763963

File tree

2 files changed

+4
-6
lines changed

2 files changed

+4
-6
lines changed

crates/core_arch/src/x86/avx.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,8 @@ pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
511511
#[cfg_attr(test, assert_instr(vblendvpd))]
512512
#[stable(feature = "simd_x86", since = "1.27.0")]
513513
pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
514-
vblendvpd(a, b, c)
514+
let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::splat(0));
515+
transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
515516
}
516517

517518
/// Blends packed single-precision (32-bit) floating-point elements from
@@ -2914,8 +2915,6 @@ extern "C" {
29142915
fn roundps256(a: __m256, b: i32) -> __m256;
29152916
#[link_name = "llvm.x86.avx.sqrt.ps.256"]
29162917
fn sqrtps256(a: __m256) -> __m256;
2917-
#[link_name = "llvm.x86.avx.blendv.pd.256"]
2918-
fn vblendvpd(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
29192918
#[link_name = "llvm.x86.avx.blendv.ps.256"]
29202919
fn vblendvps(a: __m256, b: __m256, c: __m256) -> __m256;
29212920
#[link_name = "llvm.x86.avx.dp.ps.256"]

crates/core_arch/src/x86/sse41.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ pub unsafe fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128
105105
#[cfg_attr(test, assert_instr(blendvpd))]
106106
#[stable(feature = "simd_x86", since = "1.27.0")]
107107
pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
108-
blendvpd(a, b, mask)
108+
let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::splat(0));
109+
transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2()))
109110
}
110111

111112
/// Blend packed single-precision (32-bit) floating-point elements from `a`
@@ -1137,8 +1138,6 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
11371138

11381139
#[allow(improper_ctypes)]
11391140
extern "C" {
1140-
#[link_name = "llvm.x86.sse41.blendvpd"]
1141-
fn blendvpd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d;
11421141
#[link_name = "llvm.x86.sse41.blendvps"]
11431142
fn blendvps(a: __m128, b: __m128, mask: __m128) -> __m128;
11441143
#[link_name = "llvm.x86.sse41.blendpd"]

0 commit comments

Comments
 (0)