@@ -422,8 +422,7 @@ static __inline__ __bf16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
422422/// A 128-bit vector of [4 x bfloat].
423423/// \returns A 128-bit vector of [4 x float] come from conversion of __A
424424static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps (__m128bh __A ) {
425- return _mm_castsi128_ps (
426- (__m128i )_mm_slli_epi32 ((__m128i )_mm_cvtepi16_epi32 ((__m128i )__A ), 16 ));
425+ return (__m128 )__builtin_convertvector (__A , __v4sf );
427426}
428427
429428/// Convert Packed BF16 Data to Packed float Data.
@@ -434,8 +433,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) {
434433/// A 128-bit vector of [8 x bfloat].
435434/// \returns A 256-bit vector of [8 x float] come from conversion of __A
436435static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps (__m128bh __A ) {
437- return _mm256_castsi256_ps ((__m256i )_mm256_slli_epi32 (
438- (__m256i )_mm256_cvtepi16_epi32 ((__m128i )__A ), 16 ));
436+ return (__m256 )__builtin_convertvector (__A , __v8sf );
439437}
440438
441439/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
@@ -450,8 +448,7 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
450448/// \returns A 128-bit vector of [4 x float] come from conversion of __A
451449static __inline__ __m128 __DEFAULT_FN_ATTRS128
452450_mm_maskz_cvtpbh_ps (__mmask8 __U , __m128bh __A ) {
453- return _mm_castsi128_ps ((__m128i )_mm_slli_epi32 (
454- (__m128i )_mm_maskz_cvtepi16_epi32 ((__mmask8 )__U , (__m128i )__A ), 16 ));
451+ return __mm_maskz_mov_ps (__U , (__m128 )__builtin_convertvector (__A , __v4sf ));
455452}
456453
457454/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
@@ -466,8 +463,7 @@ _mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
466463/// \returns A 256-bit vector of [8 x float] come from conversion of __A
467464static __inline__ __m256 __DEFAULT_FN_ATTRS256
468465_mm256_maskz_cvtpbh_ps (__mmask8 __U , __m128bh __A ) {
469- return _mm256_castsi256_ps ((__m256i )_mm256_slli_epi32 (
470- (__m256i )_mm256_maskz_cvtepi16_epi32 ((__mmask8 )__U , (__m128i )__A ), 16 ));
466+ return __mm256_maskz_mov_ps (__U , (__m256 )__builtin_convertvector (__A , __v8sf ));
471467}
472468
473469/// Convert Packed BF16 Data to Packed float Data using merging mask.
@@ -485,9 +481,7 @@ _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
485481/// \returns A 128-bit vector of [4 x float] come from conversion of __A
486482static __inline__ __m128 __DEFAULT_FN_ATTRS128
487483_mm_mask_cvtpbh_ps (__m128 __S , __mmask8 __U , __m128bh __A ) {
488- return _mm_castsi128_ps ((__m128i )_mm_mask_slli_epi32 (
489- (__m128i )__S , (__mmask8 )__U , (__m128i )_mm_cvtepi16_epi32 ((__m128i )__A ),
490- 16 ));
484+ return __mm_mask_mov_ps (__S , __U , (__m128 )__builtin_convertvector (__A , __v4sf ));
491485}
492486
493487/// Convert Packed BF16 Data to Packed float Data using merging mask.
@@ -505,9 +499,7 @@ _mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) {
505499/// \returns A 256-bit vector of [8 x float] come from conversion of __A
506500static __inline__ __m256 __DEFAULT_FN_ATTRS256
507501_mm256_mask_cvtpbh_ps (__m256 __S , __mmask8 __U , __m128bh __A ) {
508- return _mm256_castsi256_ps ((__m256i )_mm256_mask_slli_epi32 (
509- (__m256i )__S , (__mmask8 )__U , (__m256i )_mm256_cvtepi16_epi32 ((__m128i )__A ),
510- 16 ));
502+ return __mm256_mask_mov_ps (__S , __U , (__m256 )__builtin_convertvector (__A , __v8sf ));
511503}
512504
513505#undef __DEFAULT_FN_ATTRS128
0 commit comments