Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1072,24 +1072,24 @@ let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
def storeaps512_mask : X86Builtin<"void(_Vector<16, float *>, _Vector<16, float>, unsigned short)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def alignq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
def alignd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def alignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def alignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def alignq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
}

Expand Down
21 changes: 21 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4774,6 +4774,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::pair<unsigned, int>{VecIdx, ElemIdx};
});

case X86::BI__builtin_ia32_alignd128:
case X86::BI__builtin_ia32_alignd256:
case X86::BI__builtin_ia32_alignd512:
case X86::BI__builtin_ia32_alignq128:
case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_alignq512: {
const unsigned NumElts =
Call->getType()->castAs<VectorType>()->getNumElements();
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [NumElts](unsigned DstIdx, unsigned Shift) {
unsigned Imm = Shift & 0xFF;
unsigned EffectiveShift = Imm & (NumElts - 1);
unsigned SourcePos = DstIdx + EffectiveShift;
unsigned VecIdx = SourcePos < NumElts ? 1u : 0u;
unsigned ElemIdx =
SourcePos < NumElts ? SourcePos : SourcePos - NumElts;
return std::pair<unsigned, int>{VecIdx,
static_cast<int>(ElemIdx)};
});
}

default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
Expand Down
24 changes: 24 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13551,6 +13551,30 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_alignd128:
case X86::BI__builtin_ia32_alignd256:
case X86::BI__builtin_ia32_alignd512:
case X86::BI__builtin_ia32_alignq128:
case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_alignq512: {
APValue R;
const unsigned NumElts =
E->getType()->castAs<VectorType>()->getNumElements();
if (!evalShuffleGeneric(
Info, E, R, [NumElts](unsigned DstIdx, unsigned Shift) {
unsigned Imm = Shift & 0xFF;
unsigned EffectiveShift = Imm & (NumElts - 1);
unsigned SourcePos = DstIdx + EffectiveShift;
unsigned VecIdx = SourcePos < NumElts ? 1 : 0;
unsigned ElemIdx =
SourcePos < NumElts ? SourcePos : SourcePos - NumElts;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
SourcePos < NumElts ? SourcePos : SourcePos - NumElts;
unsigned ElemIdx = SourcePos & (NumElems - 1);


return std::pair<unsigned, int>{VecIdx,
static_cast<int>(ElemIdx)};
}))
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_permvarsi256:
case X86::BI__builtin_ia32_permvarsf256:
case X86::BI__builtin_ia32_permvardf512:
Expand Down
53 changes: 53 additions & 0 deletions clang/test/AST/ByteCode/x86-valign-builtins.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512dq -verify=expected -fexperimental-new-constant-interpreter %s
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512dq -verify=ref %s

// expected-no-diagnostics
// ref-no-diagnostics

#define __MM_MALLOC_H
#include <immintrin.h>

using v4si = int __attribute__((vector_size(16)));
using v8si = int __attribute__((vector_size(32)));
using v16si = int __attribute__((vector_size(64)));
using v4di = long long __attribute__((vector_size(32)));

constexpr v4si test_alignr_epi32_128() {
v4si A = {100, 200, 300, 400};
v4si B = {10, 20, 30, 40};
return (v4si)_mm_alignr_epi32((__m128i)A, (__m128i)B, 1);
}

constexpr v8si test_alignr_epi32_256() {
v8si A = {100, 200, 300, 400, 500, 600, 700, 800};
v8si B = {1, 2, 3, 4, 5, 6, 7, 8};
return (v8si)_mm256_alignr_epi32((__m256i)A, (__m256i)B, 3);
}

constexpr v16si test_alignr_epi32_512_wrap() {
v16si A = {100, 200, 300, 400, 500, 600, 700, 800,
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600};
v16si B = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
return (v16si)_mm512_alignr_epi32((__m512i)A, (__m512i)B, 19);
}

constexpr v4di test_alignr_epi64_256() {
v4di A = {10, 11, 12, 13};
v4di B = {1, 2, 3, 4};
return (v4di)_mm256_alignr_epi64((__m256i)A, (__m256i)B, 2);
}

constexpr v4si R128 = test_alignr_epi32_128();
static_assert(R128[0] == 20 && R128[1] == 30 && R128[2] == 40 && R128[3] == 100);

constexpr v8si R256 = test_alignr_epi32_256();
static_assert(R256[0] == 4 && R256[1] == 5 && R256[2] == 6 && R256[3] == 7);
static_assert(R256[4] == 8 && R256[5] == 100 && R256[6] == 200 && R256[7] == 300);

constexpr v16si R512 = test_alignr_epi32_512_wrap();
static_assert(R512[0] == 3 && R512[1] == 4 && R512[2] == 5 && R512[3] == 6);
static_assert(R512[8] == 11 && R512[9] == 12 && R512[10] == 13 && R512[11] == 14);
static_assert(R512[12] == 15 && R512[13] == 100 && R512[14] == 200 && R512[15] == 300);

constexpr v4di R64 = test_alignr_epi64_256();
static_assert(R64[0] == 3 && R64[1] == 4 && R64[2] == 10 && R64[3] == 11);
Loading