Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 30 additions & 10 deletions crates/core_arch/src/x86/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2778,7 +2778,7 @@ pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsllvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(simd_shl(a.as_u32x4(), count.as_u32x4())) }
unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
}

/// Shifts packed 32-bit integers in `a` left by the amount
Expand All @@ -2791,7 +2791,7 @@ pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsllvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(simd_shl(a.as_u32x8(), count.as_u32x8())) }
unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
}

/// Shifts packed 64-bit integers in `a` left by the amount
Expand All @@ -2804,7 +2804,7 @@ pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsllvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(simd_shl(a.as_u64x2(), count.as_u64x2())) }
unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
}

/// Shifts packed 64-bit integers in `a` left by the amount
Expand All @@ -2817,7 +2817,7 @@ pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsllvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(simd_shl(a.as_u64x4(), count.as_u64x4())) }
unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
}

/// Shifts packed 16-bit integers in `a` right by `count` while
Expand Down Expand Up @@ -2881,7 +2881,7 @@ pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsravd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(simd_shr(a.as_i32x4(), count.as_i32x4())) }
unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
}

/// Shifts packed 32-bit integers in `a` right by the amount specified by the
Expand All @@ -2893,7 +2893,7 @@ pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsravd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(simd_shr(a.as_i32x8(), count.as_i32x8())) }
unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
}

/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
Expand Down Expand Up @@ -3076,7 +3076,7 @@ pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrlvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(simd_shr(a.as_u32x4(), count.as_u32x4())) }
unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
}

/// Shifts packed 32-bit integers in `a` right by the amount specified by
Expand All @@ -3088,7 +3088,7 @@ pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsrlvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(simd_shr(a.as_u32x8(), count.as_u32x8())) }
unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
}

/// Shifts packed 64-bit integers in `a` right by the amount specified by
Expand All @@ -3100,7 +3100,7 @@ pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrlvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(simd_shr(a.as_u64x2(), count.as_u64x2())) }
unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
}

/// Shifts packed 64-bit integers in `a` right by the amount specified by
Expand All @@ -3112,7 +3112,7 @@ pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsrlvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(simd_shr(a.as_u64x4(), count.as_u64x4())) }
unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
}

/// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
Expand Down Expand Up @@ -3687,16 +3687,36 @@ unsafe extern "C" {
fn pslld(a: i32x8, count: i32x4) -> i32x8;
#[link_name = "llvm.x86.avx2.psll.q"]
fn psllq(a: i64x4, count: i64x2) -> i64x4;
#[link_name = "llvm.x86.avx2.psllv.d"]
fn psllvd(a: i32x4, count: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx2.psllv.d.256"]
fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx2.psllv.q"]
fn psllvq(a: i64x2, count: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx2.psllv.q.256"]
fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx2.psra.w"]
fn psraw(a: i16x16, count: i16x8) -> i16x16;
#[link_name = "llvm.x86.avx2.psra.d"]
fn psrad(a: i32x8, count: i32x4) -> i32x8;
#[link_name = "llvm.x86.avx2.psrav.d"]
fn psravd(a: i32x4, count: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx2.psrav.d.256"]
fn psravd256(a: i32x8, count: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx2.psrl.w"]
fn psrlw(a: i16x16, count: i16x8) -> i16x16;
#[link_name = "llvm.x86.avx2.psrl.d"]
fn psrld(a: i32x8, count: i32x4) -> i32x8;
#[link_name = "llvm.x86.avx2.psrl.q"]
fn psrlq(a: i64x4, count: i64x2) -> i64x4;
#[link_name = "llvm.x86.avx2.psrlv.d"]
fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx2.psrlv.d.256"]
fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx2.psrlv.q"]
fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx2.psrlv.q.256"]
fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx2.pshuf.b"]
fn pshufb(a: u8x32, b: u8x32) -> u8x32;
#[link_name = "llvm.x86.avx2.permd"]
Expand Down
39 changes: 30 additions & 9 deletions crates/core_arch/src/x86/avx512bw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6852,7 +6852,7 @@ pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsllvw))]
pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(simd_shl(a.as_u16x32(), count.as_u16x32())) }
unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) }
}

/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
Expand Down Expand Up @@ -6891,7 +6891,7 @@ pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsllvw))]
pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(simd_shl(a.as_u16x16(), count.as_u16x16())) }
unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) }
}

/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
Expand Down Expand Up @@ -6930,7 +6930,7 @@ pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsllvw))]
pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(simd_shl(a.as_u16x8(), count.as_u16x8())) }
unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) }
}

/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
Expand Down Expand Up @@ -7188,7 +7188,7 @@ pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsrlvw))]
pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(simd_shr(a.as_u16x32(), count.as_u16x32())) }
unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) }
}

/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
Expand Down Expand Up @@ -7227,7 +7227,7 @@ pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsrlvw))]
pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(simd_shr(a.as_u16x16(), count.as_u16x16())) }
unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) }
}

/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
Expand Down Expand Up @@ -7266,7 +7266,7 @@ pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsrlvw))]
pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(simd_shr(a.as_u16x8(), count.as_u16x8())) }
unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) }
}

/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
Expand Down Expand Up @@ -7511,7 +7511,7 @@ pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravw))]
pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(simd_shr(a.as_i16x32(), count.as_i16x32())) }
unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) }
}

/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
Expand Down Expand Up @@ -7550,7 +7550,7 @@ pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravw))]
pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(simd_shr(a.as_i16x16(), count.as_i16x16())) }
unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) }
}

/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
Expand Down Expand Up @@ -7589,7 +7589,7 @@ pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravw))]
pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(simd_shr(a.as_i16x8(), count.as_i16x8())) }
unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) }
}

/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
Expand Down Expand Up @@ -11645,12 +11645,33 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx512.psll.w.512"]
fn vpsllw(a: i16x32, count: i16x8) -> i16x32;

#[link_name = "llvm.x86.avx512.psllv.w.512"]
fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.psllv.w.256"]
fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx512.psllv.w.128"]
fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;

#[link_name = "llvm.x86.avx512.psrl.w.512"]
fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;

#[link_name = "llvm.x86.avx512.psrlv.w.512"]
fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.psrlv.w.256"]
fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx512.psrlv.w.128"]
fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;

#[link_name = "llvm.x86.avx512.psra.w.512"]
fn vpsraw(a: i16x32, count: i16x8) -> i16x32;

#[link_name = "llvm.x86.avx512.psrav.w.512"]
fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.psrav.w.256"]
fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx512.psrav.w.128"]
fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;

#[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
Expand Down
Loading