Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1072,24 +1072,24 @@ let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
def storeaps512_mask : X86Builtin<"void(_Vector<16, float *>, _Vector<16, float>, unsigned short)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def alignq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
def alignd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def alignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def alignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def alignq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
}

Expand Down
20 changes: 20 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4774,6 +4774,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::pair<unsigned, int>{VecIdx, ElemIdx};
});

case X86::BI__builtin_ia32_alignd128:
case X86::BI__builtin_ia32_alignd256:
case X86::BI__builtin_ia32_alignd512:
case X86::BI__builtin_ia32_alignq128:
case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_alignq512: {
unsigned NumElems =
Call->getType()->castAs<VectorType>()->getNumElements();
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [NumElems](unsigned DstIdx, unsigned Shift) {
unsigned Imm = Shift & 0xFF;
unsigned EffectiveShift = Imm & (NumElems - 1);
unsigned SourcePos = DstIdx + EffectiveShift;
unsigned VecIdx = SourcePos < NumElems ? 1u : 0u;
unsigned ElemIdx =
SourcePos < NumElems ? SourcePos : SourcePos - NumElems;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
SourcePos < NumElems ? SourcePos : SourcePos - NumElems;
unsigned ElemIdx = SourcePos & (NumElems - 1);

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh nice, fixed

return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)};
});
}

default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
Expand Down
24 changes: 24 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13551,6 +13551,30 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_alignd128:
case X86::BI__builtin_ia32_alignd256:
case X86::BI__builtin_ia32_alignd512:
case X86::BI__builtin_ia32_alignq128:
case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_alignq512: {
APValue R;
const unsigned NumElts =
E->getType()->castAs<VectorType>()->getNumElements();
if (!evalShuffleGeneric(
Info, E, R, [NumElts](unsigned DstIdx, unsigned Shift) {
unsigned Imm = Shift & 0xFF;
unsigned EffectiveShift = Imm & (NumElts - 1);
unsigned SourcePos = DstIdx + EffectiveShift;
unsigned VecIdx = SourcePos < NumElts ? 1 : 0;
unsigned ElemIdx =
SourcePos < NumElts ? SourcePos : SourcePos - NumElts;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
SourcePos < NumElts ? SourcePos : SourcePos - NumElts;
unsigned ElemIdx = SourcePos & (NumElems - 1);


return std::pair<unsigned, int>{VecIdx,
static_cast<int>(ElemIdx)};
}))
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_permvarsi256:
case X86::BI__builtin_ia32_permvarsf256:
case X86::BI__builtin_ia32_permvardf512:
Expand Down
34 changes: 34 additions & 0 deletions clang/test/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,40 @@ __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b)
return _mm512_maskz_alignr_epi64(u, a, b, 2);
}

TEST_CONSTEXPR(match_v16si(_mm512_alignr_epi32(((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15}), 19),
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 100, 200, 300));
TEST_CONSTEXPR(match_v16si(_mm512_mask_alignr_epi32(((__m512i)(__v16si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000,
9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000}),
0xA5A5,
((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15}), 19),
3, 2000, 5, 4000, 5000, 8, 7000, 10,
11, 10000, 13, 12000, 13000, 100, 15000, 300));
TEST_CONSTEXPR(match_v16si(_mm512_maskz_alignr_epi32(0x0F0F,
((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15}), 19),
3, 4, 5, 6, 0, 0, 0, 0, 11, 12, 13, 14, 0, 0, 0, 0));

TEST_CONSTEXPR(match_v8di(_mm512_alignr_epi64(((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
4, 5, 6, 7, 8, 10, 11, 12));
TEST_CONSTEXPR(match_v8di(_mm512_mask_alignr_epi64(((__m512i)(__v8di){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}),
0xA5,
((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
4, 2000, 6, 4000, 5000, 10, 7000, 12));
TEST_CONSTEXPR(match_v8di(_mm512_maskz_alignr_epi64(0x33,
((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
4, 5, 0, 0, 8, 10, 0, 0));

__m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_fmadd_round_pd
// CHECK: @llvm.x86.avx512.vfmadd.pd.512
Expand Down
47 changes: 47 additions & 0 deletions clang/test/CodeGen/X86/avx512vl-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -10518,6 +10518,53 @@ __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
return _mm256_maskz_alignr_epi64(__U, __A, __B, 1);
}

TEST_CONSTEXPR(match_v4si(_mm_alignr_epi32(((__m128i)(__v4si){100, 200, 300, 400}),
((__m128i)(__v4si){10, 20, 30, 40}), 1),
20, 30, 40, 100));
TEST_CONSTEXPR(match_v4si(_mm_mask_alignr_epi32(((__m128i)(__v4si){1000, 2000, 3000, 4000}), 0x5,
((__m128i)(__v4si){100, 200, 300, 400}),
((__m128i)(__v4si){10, 20, 30, 40}), 1),
20, 2000, 40, 4000));
TEST_CONSTEXPR(match_v4si(_mm_maskz_alignr_epi32(0x3,
((__m128i)(__v4si){100, 200, 300, 400}),
((__m128i)(__v4si){10, 20, 30, 40}), 1),
20, 30, 0, 0));

TEST_CONSTEXPR(match_v8si(_mm256_alignr_epi32(((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
4, 5, 6, 7, 8, 100, 200, 300));
TEST_CONSTEXPR(match_v8si(_mm256_mask_alignr_epi32(((__m256i)(__v8si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}),
0xA5,
((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
4, 2000, 6, 4000, 5000, 100, 7000, 300));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_alignr_epi32(0x33,
((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
4, 5, 0, 0, 8, 100, 0, 0));

TEST_CONSTEXPR(match_v2di(_mm_alignr_epi64(((__m128i)(__v2di){10, 11}), ((__m128i)(__v2di){1, 2}), 1), 2, 10));
TEST_CONSTEXPR(match_v2di(_mm_mask_alignr_epi64(((__m128i)(__v2di){1000, 2000}), 0x1,
((__m128i)(__v2di){10, 11}),
((__m128i)(__v2di){1, 2}), 1),
2, 2000));
TEST_CONSTEXPR(match_v2di(_mm_maskz_alignr_epi64(0x2,
((__m128i)(__v2di){10, 11}),
((__m128i)(__v2di){1, 2}), 1),
0, 10));

TEST_CONSTEXPR(match_v4di(_mm256_alignr_epi64(((__m256i)(__v4di){10, 11, 12, 13}),
((__m256i)(__v4di){1, 2, 3, 4}), 2),
3, 4, 10, 11));
TEST_CONSTEXPR(match_v4di(_mm256_mask_alignr_epi64(((__m256i)(__v4di){1000, 2000, 3000, 4000}), 0x5,
((__m256i)(__v4di){10, 11, 12, 13}),
((__m256i)(__v4di){1, 2, 3, 4}), 2),
3, 2000, 10, 4000));
TEST_CONSTEXPR(match_v4di(_mm256_maskz_alignr_epi64(0xA,
((__m256i)(__v4di){10, 11, 12, 13}),
((__m256i)(__v4di){1, 2, 3, 4}), 2),
0, 4, 0, 11));

__m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
// CHECK-LABEL: test_mm_mask_movehdup_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
Expand Down
Loading