Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -489,9 +489,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def vextractf128_pd256 : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int)">;
def vextractf128_ps256 : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int)">;
def vextractf128_si256 : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int)">;
def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
Expand All @@ -512,6 +509,9 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
def vextractf128_pd256 : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int)">;
def vextractf128_ps256 : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int)">;
def vextractf128_si256 : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int)">;
def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">;
def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">;
def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">;
Expand Down Expand Up @@ -620,7 +620,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
def extract128i256 : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int)">;
}


Expand Down Expand Up @@ -677,6 +676,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
def extract128i256 : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int)">;
}

let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
Expand Down Expand Up @@ -1078,7 +1078,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extractf64x4_mask : X86Builtin<"_Vector<4, double>(_Vector<8, double>, _Constant int, _Vector<4, double>, unsigned char)">;
def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">;
}
Expand Down Expand Up @@ -2957,24 +2957,24 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def pmovqw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, long long int>, unsigned char)">;
}

let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extractf32x8_mask : X86Builtin<"_Vector<8, float>(_Vector<16, float>, _Constant int, _Vector<8, float>, unsigned char)">;
def extractf64x2_512_mask : X86Builtin<"_Vector<2, double>(_Vector<8, double>, _Constant int, _Vector<2, double>, unsigned char)">;
def extracti32x8_mask : X86Builtin<"_Vector<8, int>(_Vector<16, int>, _Constant int, _Vector<8, int>, unsigned char)">;
def extracti64x2_512_mask : X86Builtin<"_Vector<2, long long int>(_Vector<8, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extracti32x4_mask : X86Builtin<"_Vector<4, int>(_Vector<16, int>, _Constant int, _Vector<4, int>, unsigned char)">;
def extracti64x4_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, long long int>, _Constant int, _Vector<4, long long int>, unsigned char)">;
}

let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def extractf64x2_256_mask : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int, _Vector<2, double>, unsigned char)">;
def extracti64x2_256_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def extractf32x4_256_mask : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int, _Vector<4, float>, unsigned char)">;
def extracti32x4_256_mask : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int, _Vector<4, int>, unsigned char)">;
}
Expand Down
95 changes: 95 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2858,6 +2858,82 @@ static bool interp__builtin_elementwise_triop(
return true;
}

static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
assert(Call->getNumArgs() == 2);

APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
uint64_t Index = ImmAPS.getZExtValue();

const Pointer &Src = S.Stk.pop<Pointer>();
if (!Src.getFieldDesc()->isPrimitiveArray())
return false;

const Pointer &Dst = S.Stk.peek<Pointer>();
if (!Dst.getFieldDesc()->isPrimitiveArray())
return false;

unsigned SrcElems = Src.getNumElems();
unsigned DstElems = Dst.getNumElems();

unsigned NumLanes = SrcElems / DstElems;
unsigned Lane = static_cast<unsigned>(Index % NumLanes);
unsigned ExtractPos = Lane * DstElems;

PrimType ElemT = Src.getFieldDesc()->getPrimType();

TYPE_SWITCH(ElemT, {
for (unsigned I = 0; I != DstElems; ++I) {
Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I);
}
});

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_x86_extract_vector_masked(InterpState &S,
CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
assert(Call->getNumArgs() == 4);

APSInt MaskAPS = popToAPSInt(S, Call->getArg(3));
const Pointer &Merge = S.Stk.pop<Pointer>();
APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();

if (!Src.getFieldDesc()->isPrimitiveArray() ||
!Merge.getFieldDesc()->isPrimitiveArray())
return false;

const Pointer &Dst = S.Stk.peek<Pointer>();
if (!Dst.getFieldDesc()->isPrimitiveArray())
return false;

unsigned SrcElems = Src.getNumElems();
unsigned DstElems = Dst.getNumElems();

unsigned NumLanes = SrcElems / DstElems;
unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
unsigned Base = Lane * DstElems;

PrimType ElemT = Src.getFieldDesc()->getPrimType();

TYPE_SWITCH(ElemT, {
for (unsigned I = 0; I != DstElems; ++I) {
if (MaskAPS[I])
Dst.elem<T>(I) = Src.elem<T>(Base + I);
else
Dst.elem<T>(I) = Merge.elem<T>(I);
}
});

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
Expand Down Expand Up @@ -3490,6 +3566,25 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
case X86::BI__builtin_ia32_extract128i256:
case X86::BI__builtin_ia32_vextractf128_pd256:
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256:
return interp__builtin_x86_extract_vector(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_extractf32x4_256_mask:
case X86::BI__builtin_ia32_extractf32x4_mask:
case X86::BI__builtin_ia32_extractf32x8_mask:
case X86::BI__builtin_ia32_extractf64x2_256_mask:
case X86::BI__builtin_ia32_extractf64x2_512_mask:
case X86::BI__builtin_ia32_extractf64x4_mask:
case X86::BI__builtin_ia32_extracti32x4_256_mask:
case X86::BI__builtin_ia32_extracti32x4_mask:
case X86::BI__builtin_ia32_extracti32x8_mask:
case X86::BI__builtin_ia32_extracti64x2_256_mask:
case X86::BI__builtin_ia32_extracti64x2_512_mask:
case X86::BI__builtin_ia32_extracti64x4_mask:
return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, BuiltinID);

case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
Expand Down
68 changes: 68 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11774,6 +11774,74 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});

case X86::BI__builtin_ia32_extract128i256:
case X86::BI__builtin_ia32_vextractf128_pd256:
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256: {
APValue SourceVec, SourceImm;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceImm))
return false;

if (!SourceVec.isVector())
return false;

const auto *RetVT = E->getType()->castAs<VectorType>();
unsigned RetLen = RetVT->getNumElements();
unsigned SrcLen = SourceVec.getVectorLength();
unsigned Idx = SourceImm.getInt().getZExtValue() & 1;

SmallVector<APValue, 32> ResultElements;
ResultElements.reserve(RetLen);

for (unsigned I = 0; I < RetLen; I++)
ResultElements.push_back(SourceVec.getVectorElt(Idx * RetLen + I));

return Success(APValue(ResultElements.data(), RetLen), E);
}

case X86::BI__builtin_ia32_extracti32x4_256_mask:
case X86::BI__builtin_ia32_extractf32x4_256_mask:
case X86::BI__builtin_ia32_extracti32x4_mask:
case X86::BI__builtin_ia32_extractf32x4_mask:
case X86::BI__builtin_ia32_extracti32x8_mask:
case X86::BI__builtin_ia32_extractf32x8_mask:
case X86::BI__builtin_ia32_extracti64x2_256_mask:
case X86::BI__builtin_ia32_extractf64x2_256_mask:
case X86::BI__builtin_ia32_extracti64x2_512_mask:
case X86::BI__builtin_ia32_extractf64x2_512_mask:
case X86::BI__builtin_ia32_extracti64x4_mask:
case X86::BI__builtin_ia32_extractf64x4_mask: {
APValue SourceVec, MergeVec;
APSInt Imm, MaskImm;

if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
!EvaluateInteger(E->getArg(1), Imm, Info) ||
!EvaluateAsRValue(Info, E->getArg(2), MergeVec) ||
!EvaluateInteger(E->getArg(3), MaskImm, Info))
return false;

const auto *RetVT = E->getType()->castAs<VectorType>();
unsigned RetLen = RetVT->getNumElements();

if (!SourceVec.isVector() || !MergeVec.isVector())
return false;
unsigned SrcLen = SourceVec.getVectorLength();
unsigned Lanes = SrcLen / RetLen;
unsigned Lane = static_cast<unsigned>(Imm.getZExtValue() % Lanes);
unsigned Base = Lane * RetLen;

SmallVector<APValue, 32> ResultElements;
ResultElements.reserve(RetLen);
for (unsigned I = 0; I < RetLen; ++I) {
if (MaskImm[I])
ResultElements.push_back(SourceVec.getVectorElt(Base + I));
else
ResultElements.push_back(MergeVec.getVectorElt(I));
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
case clang::X86::BI__builtin_ia32_pavgb256:
Expand Down
34 changes: 16 additions & 18 deletions clang/lib/Headers/avx512dqintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1212,10 +1212,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8di)_mm512_setzero_si512());
}

#define _mm512_extractf32x8_ps(A, imm) \
((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
(__v8sf)_mm256_undefined_ps(), \
(__mmask8)-1))
#define _mm512_extractf32x8_ps(A, imm) \
((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
(__v8sf)_mm_setzero_pd(), \
(__mmask8) - 1))

#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
Expand All @@ -1227,11 +1227,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U)))

#define _mm512_extractf64x2_pd(A, imm) \
((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
(int)(imm), \
(__v2df)_mm_undefined_pd(), \
(__mmask8)-1))
#define _mm512_extractf64x2_pd(A, imm) \
((__m128d)__builtin_ia32_extractf64x2_512_mask( \
(__v8df)(__m512d)(A), (int)(imm), (__v2df)_mm_setzero_pd(), \
(__mmask8) - 1))

#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
Expand All @@ -1245,10 +1244,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U)))

#define _mm512_extracti32x8_epi32(A, imm) \
((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
(__v8si)_mm256_undefined_si256(), \
(__mmask8)-1))
#define _mm512_extracti32x8_epi32(A, imm) \
((__m256i)__builtin_ia32_extracti32x8_mask( \
(__v16si)(__m512i)(A), (int)(imm), (__v8si)_mm256_setzero_si256(), \
(__mmask8) - 1))

#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
Expand All @@ -1260,11 +1259,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8si)_mm256_setzero_si256(), \
(__mmask8)(U)))

#define _mm512_extracti64x2_epi64(A, imm) \
((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
(int)(imm), \
(__v2di)_mm_undefined_si128(), \
(__mmask8)-1))
#define _mm512_extracti64x2_epi64(A, imm) \
((__m128i)__builtin_ia32_extracti64x2_512_mask( \
(__v8di)(__m512i)(A), (int)(imm), (__v2di)_mm_setzero_si128(), \
(__mmask8) - 1))

#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
Expand Down
30 changes: 15 additions & 15 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -3164,10 +3164,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
(__v16si)_mm512_setzero_si512()))
/* Vector Extract */

#define _mm512_extractf64x4_pd(A, I) \
((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
(__v4df)_mm256_undefined_pd(), \
(__mmask8)-1))
#define _mm512_extractf64x4_pd(A, I) \
((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
(__v4df)_mm256_setzero_pd(), \
(__mmask8) - 1))

#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
Expand All @@ -3179,10 +3179,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
(__v4df)_mm256_setzero_pd(), \
(__mmask8)(U)))

#define _mm512_extractf32x4_ps(A, I) \
((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
(__v4sf)_mm_undefined_ps(), \
(__mmask8)-1))
#define _mm512_extractf32x4_ps(A, I) \
((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8) - 1))

#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
Expand Down Expand Up @@ -7105,10 +7105,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
__builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
}

#define _mm512_extracti32x4_epi32(A, imm) \
((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
(__v4si)_mm_undefined_si128(), \
(__mmask8)-1))
#define _mm512_extracti32x4_epi32(A, imm) \
((__m128i)__builtin_ia32_extracti32x4_mask( \
(__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
(__mmask8) - 1))

#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
Expand All @@ -7120,10 +7120,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
(__v4si)_mm_setzero_si128(), \
(__mmask8)(U)))

#define _mm512_extracti64x4_epi64(A, imm) \
#define _mm512_extracti64x4_epi64(A, imm) \
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
(__v4di)_mm256_undefined_si256(), \
(__mmask8)-1))
(__v4di)_mm256_setzero_si256(), \
(__mmask8) - 1))

#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
Expand Down
Loading
Loading