Skip to content

Commit 27b1620

Browse files
committed
Use SIMD intrinsics for test{z,c} intrinsics
1 parent 13410d0 commit 27b1620

File tree

2 files changed

+35
-24
lines changed

2 files changed

+35
-24
lines changed

crates/core_arch/src/x86/avx.rs

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,7 +1983,10 @@ pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
19831983
#[cfg_attr(test, assert_instr(vptest))]
19841984
#[stable(feature = "simd_x86", since = "1.27.0")]
19851985
pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1986-
unsafe { ptestz256(a.as_i64x4(), b.as_i64x4()) }
1986+
unsafe {
1987+
let r = simd_and(a.as_i64x4(), b.as_i64x4());
1988+
(0i64 == simd_reduce_or(r)) as i32
1989+
}
19871990
}
19881991

19891992
/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
@@ -1997,7 +2000,10 @@ pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
19972000
#[cfg_attr(test, assert_instr(vptest))]
19982001
#[stable(feature = "simd_x86", since = "1.27.0")]
19992002
pub fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
2000-
unsafe { ptestc256(a.as_i64x4(), b.as_i64x4()) }
2003+
unsafe {
2004+
let r = simd_and(simd_xor(a.as_i64x4(), i64x4::splat(!0)), b.as_i64x4());
2005+
(0i64 == simd_reduce_or(r)) as i32
2006+
}
20012007
}
20022008

20032009
/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
@@ -2081,7 +2087,10 @@ pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
20812087
#[cfg_attr(test, assert_instr(vtestpd))]
20822088
#[stable(feature = "simd_x86", since = "1.27.0")]
20832089
pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
2084-
unsafe { vtestzpd(a, b) }
2090+
unsafe {
2091+
let r: i64x2 = simd_lt(transmute(_mm_and_pd(a, b)), i64x2::ZERO);
2092+
(0i64 == simd_reduce_or(r)) as i32
2093+
}
20852094
}
20862095

20872096
/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
@@ -2098,7 +2107,10 @@ pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
20982107
#[cfg_attr(test, assert_instr(vtestpd))]
20992108
#[stable(feature = "simd_x86", since = "1.27.0")]
21002109
pub fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
2101-
unsafe { vtestcpd(a, b) }
2110+
unsafe {
2111+
let r: i64x2 = simd_lt(transmute(_mm_andnot_pd(a, b)), i64x2::ZERO);
2112+
(0i64 == simd_reduce_or(r)) as i32
2113+
}
21022114
}
21032115

21042116
/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
@@ -2185,7 +2197,10 @@ pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
21852197
#[cfg_attr(test, assert_instr(vtestps))]
21862198
#[stable(feature = "simd_x86", since = "1.27.0")]
21872199
pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2188-
unsafe { vtestzps(a, b) }
2200+
unsafe {
2201+
let r: i32x4 = simd_lt(transmute(_mm_and_ps(a, b)), i32x4::ZERO);
2202+
(0i32 == simd_reduce_or(r)) as i32
2203+
}
21892204
}
21902205

21912206
/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
@@ -2202,7 +2217,10 @@ pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
22022217
#[cfg_attr(test, assert_instr(vtestps))]
22032218
#[stable(feature = "simd_x86", since = "1.27.0")]
22042219
pub fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2205-
unsafe { vtestcps(a, b) }
2220+
unsafe {
2221+
let r: i32x4 = simd_lt(transmute(_mm_andnot_ps(a, b)), i32x4::ZERO);
2222+
(0i32 == simd_reduce_or(r)) as i32
2223+
}
22062224
}
22072225

22082226
/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
@@ -3148,10 +3166,6 @@ unsafe extern "C" {
31483166
fn vrcpps(a: __m256) -> __m256;
31493167
#[link_name = "llvm.x86.avx.rsqrt.ps.256"]
31503168
fn vrsqrtps(a: __m256) -> __m256;
3151-
#[link_name = "llvm.x86.avx.ptestz.256"]
3152-
fn ptestz256(a: i64x4, b: i64x4) -> i32;
3153-
#[link_name = "llvm.x86.avx.ptestc.256"]
3154-
fn ptestc256(a: i64x4, b: i64x4) -> i32;
31553169
#[link_name = "llvm.x86.avx.ptestnzc.256"]
31563170
fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
31573171
#[link_name = "llvm.x86.avx.vtestz.pd.256"]
@@ -3160,10 +3174,6 @@ unsafe extern "C" {
31603174
fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
31613175
#[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
31623176
fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3163-
#[link_name = "llvm.x86.avx.vtestz.pd"]
3164-
fn vtestzpd(a: __m128d, b: __m128d) -> i32;
3165-
#[link_name = "llvm.x86.avx.vtestc.pd"]
3166-
fn vtestcpd(a: __m128d, b: __m128d) -> i32;
31673177
#[link_name = "llvm.x86.avx.vtestnzc.pd"]
31683178
fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
31693179
#[link_name = "llvm.x86.avx.vtestz.ps.256"]
@@ -3172,10 +3182,6 @@ unsafe extern "C" {
31723182
fn vtestcps256(a: __m256, b: __m256) -> i32;
31733183
#[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
31743184
fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3175-
#[link_name = "llvm.x86.avx.vtestz.ps"]
3176-
fn vtestzps(a: __m128, b: __m128) -> i32;
3177-
#[link_name = "llvm.x86.avx.vtestc.ps"]
3178-
fn vtestcps(a: __m128, b: __m128) -> i32;
31793185
#[link_name = "llvm.x86.avx.vtestnzc.ps"]
31803186
fn vtestnzcps(a: __m128, b: __m128) -> i32;
31813187
#[link_name = "llvm.x86.avx.min.ps.256"]

crates/core_arch/src/x86/sse41.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,7 +1006,10 @@ pub fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
10061006
#[cfg_attr(test, assert_instr(ptest))]
10071007
#[stable(feature = "simd_x86", since = "1.27.0")]
10081008
pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
1009-
unsafe { ptestz(a.as_i64x2(), mask.as_i64x2()) }
1009+
unsafe {
1010+
let r = simd_reduce_or(simd_and(a.as_i64x2(), mask.as_i64x2()));
1011+
(0i64 == r) as i32
1012+
}
10101013
}
10111014

10121015
/// Tests whether the specified bits in a 128-bit integer vector are all
@@ -1029,7 +1032,13 @@ pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
10291032
#[cfg_attr(test, assert_instr(ptest))]
10301033
#[stable(feature = "simd_x86", since = "1.27.0")]
10311034
pub fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
1032-
unsafe { ptestc(a.as_i64x2(), mask.as_i64x2()) }
1035+
unsafe {
1036+
let r = simd_reduce_or(simd_and(
1037+
simd_xor(a.as_i64x2(), i64x2::splat(!0)),
1038+
mask.as_i64x2(),
1039+
));
1040+
(0i64 == r) as i32
1041+
}
10331042
}
10341043

10351044
/// Tests whether the specified bits in a 128-bit integer vector are
@@ -1165,10 +1174,6 @@ unsafe extern "C" {
11651174
fn phminposuw(a: u16x8) -> u16x8;
11661175
#[link_name = "llvm.x86.sse41.mpsadbw"]
11671176
fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
1168-
#[link_name = "llvm.x86.sse41.ptestz"]
1169-
fn ptestz(a: i64x2, mask: i64x2) -> i32;
1170-
#[link_name = "llvm.x86.sse41.ptestc"]
1171-
fn ptestc(a: i64x2, mask: i64x2) -> i32;
11721177
#[link_name = "llvm.x86.sse41.ptestnzc"]
11731178
fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
11741179
}

0 commit comments

Comments
 (0)