Skip to content

Commit e6d8c96

Browse files
committed
Merge branch 'main' into kfd/WaveActiveBitOr
2 parents 165e9bc + 2e424de commit e6d8c96

File tree

9 files changed

+250
-64
lines changed

9 files changed

+250
-64
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -461,11 +461,14 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
461461
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
462462
}
463463

464-
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
464+
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
465465
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
466466
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
467467
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
468468
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
469+
}
470+
471+
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
469472
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
470473
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
471474
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -2338,15 +2341,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
23382341
def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">;
23392342
def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant int)">;
23402343
}
2341-
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
2342-
def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
2343-
def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
2344-
}
2345-
2346-
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2347-
def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
2348-
def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
2349-
}
23502344

23512345
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
23522346
def rndscalesd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int, _Constant int)">;
@@ -2439,6 +2433,14 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
24392433
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
24402434
def shufpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
24412435
def shufps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
2436+
def vpermilpd512
2437+
: X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
2438+
def vpermilps512
2439+
: X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
2440+
def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, "
2441+
"_Vector<8, long long int>)">;
2442+
def vpermilvarps512
2443+
: X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
24422444
}
24432445

24442446
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4653,6 +4653,30 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
46534653
return std::make_pair(0, static_cast<int>(LaneBase + Sel));
46544654
});
46554655

4656+
case X86::BI__builtin_ia32_vpermilvarpd:
4657+
case X86::BI__builtin_ia32_vpermilvarpd256:
4658+
case X86::BI__builtin_ia32_vpermilvarpd512:
4659+
return interp__builtin_ia32_shuffle_generic(
4660+
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
4661+
unsigned NumElemPerLane = 2;
4662+
unsigned Lane = DstIdx / NumElemPerLane;
4663+
unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
4664+
return std::make_pair(
4665+
0, static_cast<int>(Lane * NumElemPerLane + Offset));
4666+
});
4667+
4668+
case X86::BI__builtin_ia32_vpermilvarps:
4669+
case X86::BI__builtin_ia32_vpermilvarps256:
4670+
case X86::BI__builtin_ia32_vpermilvarps512:
4671+
return interp__builtin_ia32_shuffle_generic(
4672+
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
4673+
unsigned NumElemPerLane = 4;
4674+
unsigned Lane = DstIdx / NumElemPerLane;
4675+
unsigned Offset = ShuffleMask & 0b11;
4676+
return std::make_pair(
4677+
0, static_cast<int>(Lane * NumElemPerLane + Offset));
4678+
});
4679+
46564680
case X86::BI__builtin_ia32_vpermilpd:
46574681
case X86::BI__builtin_ia32_vpermilpd256:
46584682
case X86::BI__builtin_ia32_vpermilpd512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13043,6 +13043,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1304313043
return Success(R, E);
1304413044
}
1304513045

13046+
case X86::BI__builtin_ia32_vpermilvarpd:
13047+
case X86::BI__builtin_ia32_vpermilvarpd256:
13048+
case X86::BI__builtin_ia32_vpermilvarpd512: {
13049+
APValue R;
13050+
if (!evalShuffleGeneric(
13051+
Info, E, R,
13052+
[](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
13053+
unsigned NumElemPerLane = 2;
13054+
unsigned Lane = DstIdx / NumElemPerLane;
13055+
unsigned Offset = Mask & 0b10 ? 1 : 0;
13056+
return std::make_pair(
13057+
0, static_cast<int>(Lane * NumElemPerLane + Offset));
13058+
}))
13059+
return false;
13060+
return Success(R, E);
13061+
}
13062+
1304613063
case X86::BI__builtin_ia32_vpermilpd:
1304713064
case X86::BI__builtin_ia32_vpermilpd256:
1304813065
case X86::BI__builtin_ia32_vpermilpd512: {
@@ -13062,6 +13079,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1306213079
return Success(R, E);
1306313080
}
1306413081

13082+
case X86::BI__builtin_ia32_vpermilvarps:
13083+
case X86::BI__builtin_ia32_vpermilvarps256:
13084+
case X86::BI__builtin_ia32_vpermilvarps512: {
13085+
APValue R;
13086+
if (!evalShuffleGeneric(
13087+
Info, E, R,
13088+
[](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
13089+
unsigned NumElemPerLane = 4;
13090+
unsigned Lane = DstIdx / NumElemPerLane;
13091+
unsigned Offset = Mask & 0b11;
13092+
return std::make_pair(
13093+
0, static_cast<int>(Lane * NumElemPerLane + Offset));
13094+
}))
13095+
return false;
13096+
return Success(R, E);
13097+
}
13098+
1306513099
case X86::BI__builtin_ia32_phminposuw128: {
1306613100
APValue Source;
1306713101
if (!Evaluate(Source, Info, E->getArg(0)))

clang/lib/Headers/avx512fintrin.h

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5879,45 +5879,39 @@ _mm_cvttss_u64 (__m128 __A)
58795879
(__v16sf)_mm512_permute_ps((X), (C)), \
58805880
(__v16sf)_mm512_setzero_ps()))
58815881

5882-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
5883-
_mm512_permutevar_pd(__m512d __A, __m512i __C)
5884-
{
5882+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5883+
_mm512_permutevar_pd(__m512d __A, __m512i __C) {
58855884
return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
58865885
}
58875886

5888-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
5889-
_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5890-
{
5887+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5888+
_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
58915889
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
58925890
(__v8df)_mm512_permutevar_pd(__A, __C),
58935891
(__v8df)__W);
58945892
}
58955893

5896-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
5897-
_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
5898-
{
5894+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5895+
_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
58995896
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
59005897
(__v8df)_mm512_permutevar_pd(__A, __C),
59015898
(__v8df)_mm512_setzero_pd());
59025899
}
59035900

5904-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
5905-
_mm512_permutevar_ps(__m512 __A, __m512i __C)
5906-
{
5901+
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5902+
_mm512_permutevar_ps(__m512 __A, __m512i __C) {
59075903
return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
59085904
}
59095905

5910-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
5911-
_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5912-
{
5906+
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5907+
_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
59135908
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
59145909
(__v16sf)_mm512_permutevar_ps(__A, __C),
59155910
(__v16sf)__W);
59165911
}
59175912

5918-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
5919-
_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
5920-
{
5913+
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5914+
_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
59215915
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
59225916
(__v16sf)_mm512_permutevar_ps(__A, __C),
59235917
(__v16sf)_mm512_setzero_ps());

clang/lib/Headers/avx512vlintrin.h

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5847,65 +5847,57 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
58475847
(__v8sf)_mm256_permute_ps((X), (C)), \
58485848
(__v8sf)_mm256_setzero_ps()))
58495849

5850-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
5851-
_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
5852-
{
5850+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5851+
_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
58535852
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
58545853
(__v2df)_mm_permutevar_pd(__A, __C),
58555854
(__v2df)__W);
58565855
}
58575856

5858-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
5859-
_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
5860-
{
5857+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5858+
_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
58615859
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
58625860
(__v2df)_mm_permutevar_pd(__A, __C),
58635861
(__v2df)_mm_setzero_pd());
58645862
}
58655863

5866-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
5867-
_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
5868-
{
5864+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5865+
_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) {
58695866
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
58705867
(__v4df)_mm256_permutevar_pd(__A, __C),
58715868
(__v4df)__W);
58725869
}
58735870

5874-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
5875-
_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
5876-
{
5871+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5872+
_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
58775873
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
58785874
(__v4df)_mm256_permutevar_pd(__A, __C),
58795875
(__v4df)_mm256_setzero_pd());
58805876
}
58815877

5882-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
5883-
_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
5884-
{
5878+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5879+
_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) {
58855880
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
58865881
(__v4sf)_mm_permutevar_ps(__A, __C),
58875882
(__v4sf)__W);
58885883
}
58895884

5890-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
5891-
_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
5892-
{
5885+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5886+
_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
58935887
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
58945888
(__v4sf)_mm_permutevar_ps(__A, __C),
58955889
(__v4sf)_mm_setzero_ps());
58965890
}
58975891

5898-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
5899-
_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
5900-
{
5892+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5893+
_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) {
59015894
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
59025895
(__v8sf)_mm256_permutevar_ps(__A, __C),
59035896
(__v8sf)__W);
59045897
}
59055898

5906-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
5907-
_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
5908-
{
5899+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5900+
_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
59095901
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
59105902
(__v8sf)_mm256_permutevar_ps(__A, __C),
59115903
(__v8sf)_mm256_setzero_ps());

clang/lib/Headers/avxintrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -787,9 +787,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
787787
/// 1: Bits [127:64] of the source are copied to bits [127:64] of the
788788
/// returned vector.
789789
/// \returns A 128-bit vector of [2 x double] containing the copied values.
790-
static __inline __m128d __DEFAULT_FN_ATTRS128
791-
_mm_permutevar_pd(__m128d __a, __m128i __c)
792-
{
790+
static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
791+
_mm_permutevar_pd(__m128d __a, __m128i __c) {
793792
return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
794793
}
795794

@@ -826,9 +825,8 @@ _mm_permutevar_pd(__m128d __a, __m128i __c)
826825
/// 1: Bits [255:192] of the source are copied to bits [255:192] of the
827826
/// returned vector.
828827
/// \returns A 256-bit vector of [4 x double] containing the copied values.
829-
static __inline __m256d __DEFAULT_FN_ATTRS
830-
_mm256_permutevar_pd(__m256d __a, __m256i __c)
831-
{
828+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
829+
_mm256_permutevar_pd(__m256d __a, __m256i __c) {
832830
return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
833831
}
834832

@@ -881,9 +879,8 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
881879
/// 11: Bits [127:96] of the source are copied to bits [127:96] of the
882880
/// returned vector.
883881
/// \returns A 128-bit vector of [4 x float] containing the copied values.
884-
static __inline __m128 __DEFAULT_FN_ATTRS128
885-
_mm_permutevar_ps(__m128 __a, __m128i __c)
886-
{
882+
static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
883+
_mm_permutevar_ps(__m128 __a, __m128i __c) {
887884
return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
888885
}
889886

@@ -972,9 +969,8 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
972969
/// 11: Bits [255:224] of the source are copied to bits [255:224] of the
973970
/// returned vector.
974971
/// \returns A 256-bit vector of [8 x float] containing the copied values.
975-
static __inline __m256 __DEFAULT_FN_ATTRS
976-
_mm256_permutevar_ps(__m256 __a, __m256i __c)
977-
{
972+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
973+
_mm256_permutevar_ps(__m256 __a, __m256i __c) {
978974
return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
979975
}
980976

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,24 +1454,52 @@ __m128d test_mm_permutevar_pd(__m128d A, __m128i B) {
14541454
// CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}})
14551455
return _mm_permutevar_pd(A, B);
14561456
}
1457+
TEST_CONSTEXPR(match_m128d(
1458+
_mm_permutevar_pd(
1459+
((__m128d){0.0, 1.0}),
1460+
((__m128i){0b10, 0b00})
1461+
),
1462+
1.0, 0.0
1463+
));
14571464

14581465
__m256d test_mm256_permutevar_pd(__m256d A, __m256i B) {
14591466
// CHECK-LABEL: test_mm256_permutevar_pd
14601467
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %{{.*}}, <4 x i64> %{{.*}})
14611468
return _mm256_permutevar_pd(A, B);
14621469
}
1470+
TEST_CONSTEXPR(match_m256d(
1471+
_mm256_permutevar_pd(
1472+
((__m256d){0.0, 1.0, 2.0, 3.0}),
1473+
((__m256i){0b10, 0b00, 0b00, 0b10})
1474+
),
1475+
1.0, 0.0, 2.0, 3.0
1476+
));
14631477

14641478
__m128 test_mm_permutevar_ps(__m128 A, __m128i B) {
14651479
// CHECK-LABEL: test_mm_permutevar_ps
14661480
// CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %{{.*}}, <4 x i32> %{{.*}})
14671481
return _mm_permutevar_ps(A, B);
14681482
}
1483+
TEST_CONSTEXPR(match_m128(
1484+
_mm_permutevar_ps(
1485+
((__m128){0.0, 1.0, 2.0, 3.0}),
1486+
((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
1487+
),
1488+
3.0, 2.0, 1.0, 0.0
1489+
));
14691490

14701491
__m256 test_mm256_permutevar_ps(__m256 A, __m256i B) {
14711492
// CHECK-LABEL: test_mm256_permutevar_ps
14721493
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %{{.*}}, <8 x i32> %{{.*}})
14731494
return _mm256_permutevar_ps(A, B);
14741495
}
1496+
TEST_CONSTEXPR(match_m256(
1497+
_mm256_permutevar_ps(
1498+
((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
1499+
((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10})
1500+
),
1501+
3.0, 2.0, 1.0, 0.0, 5.0, 4.0, 7.0, 6.0
1502+
));
14751503

14761504
__m256 test_mm256_rcp_ps(__m256 A) {
14771505
// CHECK-LABEL: test_mm256_rcp_ps

0 commit comments

Comments
 (0)