Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVecto
def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
Expand Down Expand Up @@ -589,7 +589,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
Expand Down Expand Up @@ -1078,7 +1078,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extractf64x4_mask : X86Builtin<"_Vector<4, double>(_Vector<8, double>, _Constant int, _Vector<4, double>, unsigned char)">;
def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">;
}
Expand Down Expand Up @@ -2957,24 +2957,24 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def pmovqw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, long long int>, unsigned char)">;
}

let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extractf32x8_mask : X86Builtin<"_Vector<8, float>(_Vector<16, float>, _Constant int, _Vector<8, float>, unsigned char)">;
def extractf64x2_512_mask : X86Builtin<"_Vector<2, double>(_Vector<8, double>, _Constant int, _Vector<2, double>, unsigned char)">;
def extracti32x8_mask : X86Builtin<"_Vector<8, int>(_Vector<16, int>, _Constant int, _Vector<8, int>, unsigned char)">;
def extracti64x2_512_mask : X86Builtin<"_Vector<2, long long int>(_Vector<8, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extracti32x4_mask : X86Builtin<"_Vector<4, int>(_Vector<16, int>, _Constant int, _Vector<4, int>, unsigned char)">;
def extracti64x4_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, long long int>, _Constant int, _Vector<4, long long int>, unsigned char)">;
}

let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def extractf64x2_256_mask : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int, _Vector<2, double>, unsigned char)">;
def extracti64x2_256_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def extractf32x4_256_mask : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int, _Vector<4, float>, unsigned char)">;
def extracti32x4_256_mask : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int, _Vector<4, int>, unsigned char)">;
}
Expand Down
105 changes: 105 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2858,6 +2858,92 @@ static bool interp__builtin_elementwise_triop(
return true;
}

static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
assert(Call->getNumArgs() == 2);

APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
uint64_t Index = ImmAPS.getZExtValue();

const Pointer &Src = S.Stk.pop<Pointer>();
if (!Src.getFieldDesc()->isPrimitiveArray())
return false;

const Pointer &Dst = S.Stk.peek<Pointer>();
if (!Dst.getFieldDesc()->isPrimitiveArray())
return false;

unsigned SrcElems = Src.getNumElems();
unsigned DstElems = Dst.getNumElems();

if (SrcElems == 0 || DstElems == 0 || (SrcElems % DstElems) != 0)
return false;

unsigned NumLanes = SrcElems / DstElems;
unsigned Lane = static_cast<unsigned>(Index % NumLanes);
unsigned ExtractPos = Lane * DstElems;

PrimType ElemT = Src.getFieldDesc()->getPrimType();
if (ElemT != Dst.getFieldDesc()->getPrimType())
return false;

TYPE_SWITCH(ElemT, {
for (unsigned I = 0; I != DstElems; ++I) {
Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I);
}
});

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
assert(Call->getNumArgs() == 4);

APSInt MaskAPS = popToAPSInt(S, Call->getArg(3));
const Pointer &Merge = S.Stk.pop<Pointer>();
APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();

if (!Src.getFieldDesc()->isPrimitiveArray() || !Merge.getFieldDesc()->isPrimitiveArray())
return false;

const Pointer &Dst = S.Stk.peek<Pointer>();
if (!Dst.getFieldDesc()->isPrimitiveArray())
return false;

unsigned SrcElems = Src.getNumElems();
unsigned DstElems = Dst.getNumElems();
if (!SrcElems || !DstElems || (SrcElems % DstElems) != 0)
return false;

PrimType ElemT = Src.getFieldDesc()->getPrimType();
if (ElemT != Dst.getFieldDesc()->getPrimType() ||
ElemT != Merge.getFieldDesc()->getPrimType())
return false;

unsigned NumLanes = SrcElems / DstElems;
unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
unsigned Base = Lane * DstElems;

uint64_t Mask = MaskAPS.getZExtValue();

TYPE_SWITCH(ElemT, {
for (unsigned I = 0; I != DstElems; ++I) {
if ((Mask >> I) & 1)
Dst.elem<T>(I) = Src.elem<T>(Base + I);
else
Dst.elem<T>(I) = Merge.elem<T>(I);
}
});

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
Expand Down Expand Up @@ -3490,6 +3576,25 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
case X86::BI__builtin_ia32_extract128i256:
case X86::BI__builtin_ia32_vextractf128_pd256:
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256:
return interp__builtin_x86_extract_vector(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_extractf32x4_256_mask:
case X86::BI__builtin_ia32_extractf32x4_mask:
case X86::BI__builtin_ia32_extractf32x8_mask:
case X86::BI__builtin_ia32_extractf64x2_256_mask:
case X86::BI__builtin_ia32_extractf64x2_512_mask:
case X86::BI__builtin_ia32_extractf64x4_mask:
case X86::BI__builtin_ia32_extracti32x4_256_mask:
case X86::BI__builtin_ia32_extracti32x4_mask:
case X86::BI__builtin_ia32_extracti32x8_mask:
case X86::BI__builtin_ia32_extracti64x2_256_mask:
case X86::BI__builtin_ia32_extracti64x2_512_mask:
case X86::BI__builtin_ia32_extracti64x4_mask:
return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, BuiltinID);

case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
Expand Down
75 changes: 75 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11773,6 +11773,81 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return EvaluateBinOpExpr([](const APSInt &LHS, const APSInt &RHS) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});

case X86::BI__builtin_ia32_extract128i256:
case X86::BI__builtin_ia32_vextractf128_pd256:
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256: {
APValue SourceVec, SourceImm;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceImm))
return false;

if (!SourceVec.isVector())
return false;

const auto *RetVT = E->getType()->castAs<VectorType>();
if (!RetVT) return false;

unsigned RetLen = RetVT->getNumElements();
unsigned SrcLen = SourceVec.getVectorLength();
if (SrcLen != RetLen * 2)
return false;

unsigned Idx = SourceImm.getInt().getZExtValue() & 1;

SmallVector<APValue, 32> ResultElements;
ResultElements.reserve(RetLen);

for (unsigned I = 0; I < RetLen; I++)
ResultElements.push_back(SourceVec.getVectorElt(Idx * RetLen + I));

return Success(APValue(ResultElements.data(), RetLen), E);
}

case X86::BI__builtin_ia32_extracti32x4_256_mask:
case X86::BI__builtin_ia32_extractf32x4_256_mask:
case X86::BI__builtin_ia32_extracti32x4_mask:
case X86::BI__builtin_ia32_extractf32x4_mask:
case X86::BI__builtin_ia32_extracti32x8_mask:
case X86::BI__builtin_ia32_extractf32x8_mask:
case X86::BI__builtin_ia32_extracti64x2_256_mask:
case X86::BI__builtin_ia32_extractf64x2_256_mask:
case X86::BI__builtin_ia32_extracti64x2_512_mask:
case X86::BI__builtin_ia32_extractf64x2_512_mask:
case X86::BI__builtin_ia32_extracti64x4_mask:
case X86::BI__builtin_ia32_extractf64x4_mask:{
APValue SourceVec, MergeVec;
APSInt Imm, MaskImm;

if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
!EvaluateInteger(E->getArg(1), Imm, Info) ||
!EvaluateAsRValue(Info, E->getArg(2), MergeVec) ||
!EvaluateInteger(E->getArg(3), MaskImm, Info))
return false;

const auto *RetVT = E->getType()->castAs<VectorType>();
unsigned RetLen = RetVT->getNumElements();

if (!SourceVec.isVector() || !MergeVec.isVector()) return false;
unsigned SrcLen = SourceVec.getVectorLength();
if (!SrcLen || !RetLen || (SrcLen % RetLen) != 0) return false;

unsigned Lanes = SrcLen / RetLen;
unsigned Lane = static_cast<unsigned>(Imm.getZExtValue() % Lanes);
unsigned Base = Lane * RetLen;
uint64_t Mask = MaskImm.getZExtValue();

SmallVector<APValue, 32> ResultElements;
ResultElements.reserve(RetLen);
for (unsigned I = 0; I < RetLen; ++I) {
if ((Mask >> I) & 1)
ResultElements.push_back(SourceVec.getVectorElt(Base + I));
else
ResultElements.push_back(MergeVec.getVectorElt(I));
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/Headers/avx512dqintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1214,7 +1214,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)

#define _mm512_extractf32x8_ps(A, imm) \
((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
(__v8sf)_mm256_undefined_ps(), \
(__v8sf)_mm_setzero_pd(), \
(__mmask8)-1))

#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
Expand All @@ -1230,7 +1230,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extractf64x2_pd(A, imm) \
((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
(int)(imm), \
(__v2df)_mm_undefined_pd(), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1))

#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
Expand All @@ -1247,7 +1247,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)

#define _mm512_extracti32x8_epi32(A, imm) \
((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
(__v8si)_mm256_undefined_si256(), \
(__v8si)_mm256_setzero_si256(), \
(__mmask8)-1))

#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
Expand All @@ -1263,7 +1263,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extracti64x2_epi64(A, imm) \
((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
(int)(imm), \
(__v2di)_mm_undefined_si128(), \
(__v2di)_mm_setzero_si128(), \
(__mmask8)-1))

#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -3166,7 +3166,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,

#define _mm512_extractf64x4_pd(A, I) \
((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
(__v4df)_mm256_undefined_pd(), \
(__v4df)_mm256_setzero_pd(), \
(__mmask8)-1))

#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
Expand All @@ -3181,7 +3181,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,

#define _mm512_extractf32x4_ps(A, I) \
((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
(__v4sf)_mm_undefined_ps(), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1))

#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
Expand Down Expand Up @@ -7107,7 +7107,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)

#define _mm512_extracti32x4_epi32(A, imm) \
((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
(__v4si)_mm_undefined_si128(), \
(__v4si)_mm_setzero_si128(), \
(__mmask8)-1))

#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
Expand All @@ -7122,7 +7122,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)

#define _mm512_extracti64x4_epi64(A, imm) \
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
(__v4di)_mm256_undefined_si256(), \
(__v4di)_mm256_setzero_si256(), \
(__mmask8)-1))

#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Headers/avx512vldqintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_extractf64x2_pd(A, imm) \
((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
(int)(imm), \
(__v2df)_mm_undefined_pd(), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1))

#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
Expand All @@ -1093,7 +1093,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_extracti64x2_epi64(A, imm) \
((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
(int)(imm), \
(__v2di)_mm_undefined_si128(), \
(__v2di)_mm_setzero_si128(), \
(__mmask8)-1))

#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Headers/avx512vlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -7609,7 +7609,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_extractf32x4_ps(A, imm) \
((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
(int)(imm), \
(__v4sf)_mm_undefined_ps(), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1))

#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
Expand All @@ -7627,7 +7627,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_extracti32x4_epi32(A, imm) \
((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
(int)(imm), \
(__v4si)_mm_undefined_si128(), \
(__v4si)_mm_setzero_si128(), \
(__mmask8)-1))

#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
Expand Down
8 changes: 7 additions & 1 deletion clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1070,19 +1070,25 @@ __m128d test_mm256_extractf128_pd(__m256d A) {
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
return _mm256_extractf128_pd(A, 1);
}
TEST_CONSTEXPR(match_m128d(_mm256_extractf128_pd(((__m256d){0.0, 1.0, 2.0, 3.0}), 1),
2.0, 3.0));

__m128 test_mm256_extractf128_ps(__m256 A) {
// CHECK-LABEL: test_mm256_extractf128_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf128_ps(A, 1);
}
TEST_CONSTEXPR(match_m128(_mm256_extractf128_ps(((__m256){0,1,2,3,4,5,6,7}), 1),
4.0f, 5.0f, 6.0f, 7.0f));

__m128i test_mm256_extractf128_si256(__m256i A) {
// CHECK-LABEL: test_mm256_extractf128_si256
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf128_si256(A, 1);
}

TEST_CONSTEXPR(match_m128i(_mm256_extractf128_si256(((__m256i){0ULL, 1ULL, 2ULL, 3ULL}), 1),
2ULL, 3ULL));

__m256d test_mm256_floor_pd(__m256d x) {
// CHECK-LABEL: test_mm256_floor_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 1)
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGen/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ __m128i test0_mm256_extracti128_si256_0(__m256i a) {
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
return _mm256_extracti128_si256(a, 0);
}
TEST_CONSTEXPR(match_m128i(_mm256_extracti128_si256(((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), 0),1ULL, 2ULL));

__m128i test1_mm256_extracti128_si256_1(__m256i a) {
// CHECK-LABEL: test1_mm256_extracti128_si256
Expand Down
Loading
Loading