Skip to content

Commit 575823d

Browse files
committed
Use SIMD intrinsics for madd, hadd and hsub intrinsics
1 parent 401a003 commit 575823d

File tree

6 files changed

+138
-61
lines changed

6 files changed

+138
-61
lines changed

crates/core_arch/src/x86/avx.rs

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,11 @@ pub fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
587587
#[cfg_attr(test, assert_instr(vhaddpd))]
588588
#[stable(feature = "simd_x86", since = "1.27.0")]
589589
pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
590-
unsafe { vhaddpd(a, b) }
590+
unsafe {
591+
let even = simd_shuffle!(a, b, [0, 4, 2, 6]);
592+
let odd = simd_shuffle!(a, b, [1, 5, 3, 7]);
593+
simd_add(even, odd)
594+
}
591595
}
592596

593597
/// Horizontal addition of adjacent pairs in the two packed vectors
@@ -602,7 +606,11 @@ pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
602606
#[cfg_attr(test, assert_instr(vhaddps))]
603607
#[stable(feature = "simd_x86", since = "1.27.0")]
604608
pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
605-
unsafe { vhaddps(a, b) }
609+
unsafe {
610+
let even = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
611+
let odd = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
612+
simd_add(even, odd)
613+
}
606614
}
607615

608616
/// Horizontal subtraction of adjacent pairs in the two packed vectors
@@ -616,7 +624,11 @@ pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
616624
#[cfg_attr(test, assert_instr(vhsubpd))]
617625
#[stable(feature = "simd_x86", since = "1.27.0")]
618626
pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
619-
unsafe { vhsubpd(a, b) }
627+
unsafe {
628+
let even = simd_shuffle!(a, b, [0, 4, 2, 6]);
629+
let odd = simd_shuffle!(a, b, [1, 5, 3, 7]);
630+
simd_sub(even, odd)
631+
}
620632
}
621633

622634
/// Horizontal subtraction of adjacent pairs in the two packed vectors
@@ -631,7 +643,11 @@ pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
631643
#[cfg_attr(test, assert_instr(vhsubps))]
632644
#[stable(feature = "simd_x86", since = "1.27.0")]
633645
pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
634-
unsafe { vhsubps(a, b) }
646+
unsafe {
647+
let even = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
648+
let odd = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
649+
simd_sub(even, odd)
650+
}
635651
}
636652

637653
/// Computes the bitwise XOR of packed double-precision (64-bit) floating-point
@@ -3044,14 +3060,6 @@ unsafe extern "C" {
30443060
fn roundps256(a: __m256, b: i32) -> __m256;
30453061
#[link_name = "llvm.x86.avx.dp.ps.256"]
30463062
fn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256;
3047-
#[link_name = "llvm.x86.avx.hadd.pd.256"]
3048-
fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
3049-
#[link_name = "llvm.x86.avx.hadd.ps.256"]
3050-
fn vhaddps(a: __m256, b: __m256) -> __m256;
3051-
#[link_name = "llvm.x86.avx.hsub.pd.256"]
3052-
fn vhsubpd(a: __m256d, b: __m256d) -> __m256d;
3053-
#[link_name = "llvm.x86.avx.hsub.ps.256"]
3054-
fn vhsubps(a: __m256, b: __m256) -> __m256;
30553063
#[link_name = "llvm.x86.sse2.cmp.pd"]
30563064
fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
30573065
#[link_name = "llvm.x86.avx.cmp.pd.256"]

crates/core_arch/src/x86/avx2.rs

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -891,7 +891,21 @@ pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
891891
#[cfg_attr(test, assert_instr(vphaddw))]
892892
#[stable(feature = "simd_x86", since = "1.27.0")]
893893
pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
894-
unsafe { transmute(phaddw(a.as_i16x16(), b.as_i16x16())) }
894+
let a = a.as_i16x16();
895+
let b = b.as_i16x16();
896+
unsafe {
897+
let even: i16x16 = simd_shuffle!(
898+
a,
899+
b,
900+
[0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
901+
);
902+
let odd: i16x16 = simd_shuffle!(
903+
a,
904+
b,
905+
[1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
906+
);
907+
simd_add(even, odd).as_m256i()
908+
}
895909
}
896910

897911
/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
@@ -902,7 +916,13 @@ pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
902916
#[cfg_attr(test, assert_instr(vphaddd))]
903917
#[stable(feature = "simd_x86", since = "1.27.0")]
904918
pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
905-
unsafe { transmute(phaddd(a.as_i32x8(), b.as_i32x8())) }
919+
let a = a.as_i32x8();
920+
let b = b.as_i32x8();
921+
unsafe {
922+
let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
923+
let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
924+
simd_add(even, odd).as_m256i()
925+
}
906926
}
907927

908928
/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
@@ -925,7 +945,21 @@ pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
925945
#[cfg_attr(test, assert_instr(vphsubw))]
926946
#[stable(feature = "simd_x86", since = "1.27.0")]
927947
pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
928-
unsafe { transmute(phsubw(a.as_i16x16(), b.as_i16x16())) }
948+
let a = a.as_i16x16();
949+
let b = b.as_i16x16();
950+
unsafe {
951+
let even: i16x16 = simd_shuffle!(
952+
a,
953+
b,
954+
[0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
955+
);
956+
let odd: i16x16 = simd_shuffle!(
957+
a,
958+
b,
959+
[1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
960+
);
961+
simd_sub(even, odd).as_m256i()
962+
}
929963
}
930964

931965
/// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
@@ -936,7 +970,13 @@ pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
936970
#[cfg_attr(test, assert_instr(vphsubd))]
937971
#[stable(feature = "simd_x86", since = "1.27.0")]
938972
pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
939-
unsafe { transmute(phsubd(a.as_i32x8(), b.as_i32x8())) }
973+
let a = a.as_i32x8();
974+
let b = b.as_i32x8();
975+
unsafe {
976+
let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
977+
let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
978+
simd_sub(even, odd).as_m256i()
979+
}
940980
}
941981

942982
/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
@@ -1714,7 +1754,12 @@ pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m25
17141754
#[cfg_attr(test, assert_instr(vpmaddwd))]
17151755
#[stable(feature = "simd_x86", since = "1.27.0")]
17161756
pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1717-
unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1757+
unsafe {
1758+
let r: i32x16 = simd_mul(simd_cast(a.as_i16x16()), simd_cast(b.as_i16x16()));
1759+
let even: i32x8 = simd_shuffle!(r, r, [0, 2, 4, 6, 8, 10, 12, 14]);
1760+
let odd: i32x8 = simd_shuffle!(r, r, [1, 3, 5, 7, 9, 11, 13, 15]);
1761+
simd_add(even, odd).as_m256i()
1762+
}
17181763
}
17191764

17201765
/// Vertically multiplies each unsigned 8-bit integer from `a` with the
@@ -3594,20 +3639,10 @@ pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
35943639

35953640
#[allow(improper_ctypes)]
35963641
unsafe extern "C" {
3597-
#[link_name = "llvm.x86.avx2.phadd.w"]
3598-
fn phaddw(a: i16x16, b: i16x16) -> i16x16;
3599-
#[link_name = "llvm.x86.avx2.phadd.d"]
3600-
fn phaddd(a: i32x8, b: i32x8) -> i32x8;
36013642
#[link_name = "llvm.x86.avx2.phadd.sw"]
36023643
fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3603-
#[link_name = "llvm.x86.avx2.phsub.w"]
3604-
fn phsubw(a: i16x16, b: i16x16) -> i16x16;
3605-
#[link_name = "llvm.x86.avx2.phsub.d"]
3606-
fn phsubd(a: i32x8, b: i32x8) -> i32x8;
36073644
#[link_name = "llvm.x86.avx2.phsub.sw"]
36083645
fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3609-
#[link_name = "llvm.x86.avx2.pmadd.wd"]
3610-
fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
36113646
#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
36123647
fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
36133648
#[link_name = "llvm.x86.avx2.maskload.d"]

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5835,7 +5835,20 @@ pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128
58355835
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
58365836
#[cfg_attr(test, assert_instr(vpmaddwd))]
58375837
pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
5838-
unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
5838+
unsafe {
5839+
let r: i32x32 = simd_mul(simd_cast(a.as_i16x32()), simd_cast(b.as_i16x32()));
5840+
let even: i32x16 = simd_shuffle!(
5841+
r,
5842+
r,
5843+
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
5844+
);
5845+
let odd: i32x16 = simd_shuffle!(
5846+
r,
5847+
r,
5848+
[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
5849+
);
5850+
simd_add(even, odd).as_m512i()
5851+
}
58395852
}
58405853

58415854
/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -11617,8 +11630,6 @@ unsafe extern "C" {
1161711630
#[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
1161811631
fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
1161911632

11620-
#[link_name = "llvm.x86.avx512.pmaddw.d.512"]
11621-
fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
1162211633
#[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
1162311634
fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
1162411635

crates/core_arch/src/x86/sse2.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,12 @@ pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
201201
#[cfg_attr(test, assert_instr(pmaddwd))]
202202
#[stable(feature = "simd_x86", since = "1.27.0")]
203203
pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204-
unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
204+
unsafe {
205+
let r: i32x8 = simd_mul(simd_cast(a.as_i16x8()), simd_cast(b.as_i16x8()));
206+
let even: i32x4 = simd_shuffle!(r, r, [0, 2, 4, 6]);
207+
let odd: i32x4 = simd_shuffle!(r, r, [1, 3, 5, 7]);
208+
simd_add(even, odd).as_m128i()
209+
}
205210
}
206211

207212
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -3043,8 +3048,6 @@ unsafe extern "C" {
30433048
fn lfence();
30443049
#[link_name = "llvm.x86.sse2.mfence"]
30453050
fn mfence();
3046-
#[link_name = "llvm.x86.sse2.pmadd.wd"]
3047-
fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
30483051
#[link_name = "llvm.x86.sse2.psad.bw"]
30493052
fn psadbw(a: u8x16, b: u8x16) -> u64x2;
30503053
#[link_name = "llvm.x86.sse2.psll.w"]

crates/core_arch/src/x86/sse3.rs

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@ pub fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
5151
#[cfg_attr(test, assert_instr(haddpd))]
5252
#[stable(feature = "simd_x86", since = "1.27.0")]
5353
pub fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
54-
unsafe { haddpd(a, b) }
54+
unsafe {
55+
let even = simd_shuffle!(a, b, [0, 2]);
56+
let odd = simd_shuffle!(a, b, [1, 3]);
57+
simd_add(even, odd)
58+
}
5559
}
5660

5761
/// Horizontally adds adjacent pairs of single-precision (32-bit)
@@ -63,7 +67,11 @@ pub fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
6367
#[cfg_attr(test, assert_instr(haddps))]
6468
#[stable(feature = "simd_x86", since = "1.27.0")]
6569
pub fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
66-
unsafe { haddps(a, b) }
70+
unsafe {
71+
let even = simd_shuffle!(a, b, [0, 2, 4, 6]);
72+
let odd = simd_shuffle!(a, b, [1, 3, 5, 7]);
73+
simd_add(even, odd)
74+
}
6775
}
6876

6977
/// Horizontally subtract adjacent pairs of double-precision (64-bit)
@@ -75,7 +83,11 @@ pub fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
7583
#[cfg_attr(test, assert_instr(hsubpd))]
7684
#[stable(feature = "simd_x86", since = "1.27.0")]
7785
pub fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
78-
unsafe { hsubpd(a, b) }
86+
unsafe {
87+
let even = simd_shuffle!(a, b, [0, 2]);
88+
let odd = simd_shuffle!(a, b, [1, 3]);
89+
simd_sub(even, odd)
90+
}
7991
}
8092

8193
/// Horizontally adds adjacent pairs of single-precision (32-bit)
@@ -87,7 +99,11 @@ pub fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
8799
#[cfg_attr(test, assert_instr(hsubps))]
88100
#[stable(feature = "simd_x86", since = "1.27.0")]
89101
pub fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
90-
unsafe { hsubps(a, b) }
102+
unsafe {
103+
let even = simd_shuffle!(a, b, [0, 2, 4, 6]);
104+
let odd = simd_shuffle!(a, b, [1, 3, 5, 7]);
105+
simd_sub(even, odd)
106+
}
91107
}
92108

93109
/// Loads 128-bits of integer data from unaligned memory.
@@ -153,14 +169,6 @@ pub fn _mm_moveldup_ps(a: __m128) -> __m128 {
153169

154170
#[allow(improper_ctypes)]
155171
unsafe extern "C" {
156-
#[link_name = "llvm.x86.sse3.hadd.pd"]
157-
fn haddpd(a: __m128d, b: __m128d) -> __m128d;
158-
#[link_name = "llvm.x86.sse3.hadd.ps"]
159-
fn haddps(a: __m128, b: __m128) -> __m128;
160-
#[link_name = "llvm.x86.sse3.hsub.pd"]
161-
fn hsubpd(a: __m128d, b: __m128d) -> __m128d;
162-
#[link_name = "llvm.x86.sse3.hsub.ps"]
163-
fn hsubps(a: __m128, b: __m128) -> __m128;
164172
#[link_name = "llvm.x86.sse3.ldu.dq"]
165173
fn lddqu(mem_addr: *const i8) -> i8x16;
166174
}

crates/core_arch/src/x86/ssse3.rs

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,13 @@ pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
164164
#[cfg_attr(test, assert_instr(phaddw))]
165165
#[stable(feature = "simd_x86", since = "1.27.0")]
166166
pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
167-
unsafe { transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) }
167+
let a = a.as_i16x8();
168+
let b = b.as_i16x8();
169+
unsafe {
170+
let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
171+
let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
172+
simd_add(even, odd).as_m128i()
173+
}
168174
}
169175

170176
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -189,7 +195,13 @@ pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
189195
#[cfg_attr(test, assert_instr(phaddd))]
190196
#[stable(feature = "simd_x86", since = "1.27.0")]
191197
pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
192-
unsafe { transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) }
198+
let a = a.as_i32x4();
199+
let b = b.as_i32x4();
200+
unsafe {
201+
let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
202+
let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
203+
simd_add(even, odd).as_m128i()
204+
}
193205
}
194206

195207
/// Horizontally subtract the adjacent pairs of values contained in 2
@@ -201,7 +213,13 @@ pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
201213
#[cfg_attr(test, assert_instr(phsubw))]
202214
#[stable(feature = "simd_x86", since = "1.27.0")]
203215
pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
204-
unsafe { transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) }
216+
let a = a.as_i16x8();
217+
let b = b.as_i16x8();
218+
unsafe {
219+
let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
220+
let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
221+
simd_sub(even, odd).as_m128i()
222+
}
205223
}
206224

207225
/// Horizontally subtract the adjacent pairs of values contained in 2
@@ -227,7 +245,13 @@ pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
227245
#[cfg_attr(test, assert_instr(phsubd))]
228246
#[stable(feature = "simd_x86", since = "1.27.0")]
229247
pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
230-
unsafe { transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) }
248+
let a = a.as_i32x4();
249+
let b = b.as_i32x4();
250+
unsafe {
251+
let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
252+
let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
253+
simd_sub(even, odd).as_m128i()
254+
}
231255
}
232256

233257
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -305,24 +329,12 @@ unsafe extern "C" {
305329
#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
306330
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
307331

308-
#[link_name = "llvm.x86.ssse3.phadd.w.128"]
309-
fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
310-
311332
#[link_name = "llvm.x86.ssse3.phadd.sw.128"]
312333
fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
313334

314-
#[link_name = "llvm.x86.ssse3.phadd.d.128"]
315-
fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
316-
317-
#[link_name = "llvm.x86.ssse3.phsub.w.128"]
318-
fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
319-
320335
#[link_name = "llvm.x86.ssse3.phsub.sw.128"]
321336
fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
322337

323-
#[link_name = "llvm.x86.ssse3.phsub.d.128"]
324-
fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
325-
326338
#[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
327339
fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
328340

0 commit comments

Comments
 (0)