Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -2371,7 +2371,8 @@ let Features = "avx512vl",
def pternlogq256_maskz : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def shuf_f32x4 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
def shuf_f64x2 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
def shuf_i32x4 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
Expand All @@ -2391,7 +2392,8 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
: X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def shuf_f32x4_256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
def shuf_f64x2_256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
def shuf_i32x4_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
Expand Down
33 changes: 33 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4847,6 +4847,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return interp__builtin_elementwise_triop(S, OpPC, Call,
llvm::APIntOps::fshr);

case X86::BI__builtin_ia32_shuf_f32x4_256:
case X86::BI__builtin_ia32_shuf_i32x4_256:
case X86::BI__builtin_ia32_shuf_f64x2_256:
case X86::BI__builtin_ia32_shuf_i64x2_256:
case X86::BI__builtin_ia32_shuf_f32x4:
case X86::BI__builtin_ia32_shuf_i32x4:
case X86::BI__builtin_ia32_shuf_f64x2:
case X86::BI__builtin_ia32_shuf_i64x2: {
// Destination and sources A, B all have the same type.
QualType VecQT = Call->getArg(0)->getType();
const auto *VecT = VecQT->castAs<VectorType>();
unsigned NumElems = VecT->getNumElements();
unsigned ElemBits = S.getASTContext().getTypeSize(VecT->getElementType());
unsigned LaneBits = 128u;
unsigned NumLanes = (NumElems * ElemBits) / LaneBits;
unsigned NumElemsPerLane = LaneBits / ElemBits;

return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call,
[NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask) {
// DstIdx determines source. ShuffleMask selects lane in source.
unsigned BitsPerElem = NumLanes / 2;
unsigned IndexMask = (1u << BitsPerElem) - 1;
unsigned Lane = DstIdx / NumElemsPerLane;
unsigned SrcIdx = (Lane < NumLanes / 2) ? 0 : 1;
unsigned BitIdx = BitsPerElem * Lane;
unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask;
unsigned ElemInLane = DstIdx % NumElemsPerLane;
unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane;
return std::pair<unsigned, int>{SrcIdx, IdxToPick};
});
}

case X86::BI__builtin_ia32_insertf32x4_256:
case X86::BI__builtin_ia32_inserti32x4_256:
case X86::BI__builtin_ia32_insertf64x2_256:
Expand Down
50 changes: 50 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13517,6 +13517,56 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case X86::BI__builtin_ia32_shuf_f32x4_256:
case X86::BI__builtin_ia32_shuf_i32x4_256:
case X86::BI__builtin_ia32_shuf_f64x2_256:
case X86::BI__builtin_ia32_shuf_i64x2_256:
case X86::BI__builtin_ia32_shuf_f32x4:
case X86::BI__builtin_ia32_shuf_i32x4:
case X86::BI__builtin_ia32_shuf_f64x2:
case X86::BI__builtin_ia32_shuf_i64x2: {
APValue SourceA, SourceB;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceA) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceB))
return false;

APSInt Imm;
if (!EvaluateInteger(E->getArg(2), Imm, Info))
return false;

// Destination and sources A, B all have the same type.
unsigned NumElems = SourceA.getVectorLength();
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
QualType ElemQT = VT->getElementType();
unsigned ElemBits = Info.Ctx.getTypeSize(ElemQT);
unsigned LaneBits = 128u;
unsigned NumLanes = (NumElems * ElemBits) / LaneBits;
unsigned NumElemsPerLane = LaneBits / ElemBits;

unsigned DstLen = SourceA.getVectorLength();
SmallVector<APValue, 16> ResultElements;
ResultElements.reserve(DstLen);

APValue R;
if (!evalShuffleGeneric(
Info, E, R,
[NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask)
-> std::pair<unsigned, int> {
// DstIdx determines source. ShuffleMask selects lane in source.
unsigned BitsPerElem = NumLanes / 2;
unsigned IndexMask = (1u << BitsPerElem) - 1;
unsigned Lane = DstIdx / NumElemsPerLane;
unsigned SrcIdx = (Lane < NumLanes / 2) ? 0 : 1;
unsigned BitIdx = BitsPerElem * Lane;
unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask;
unsigned ElemInLane = DstIdx % NumElemsPerLane;
unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane;
return {SrcIdx, IdxToPick};
}))
return false;
return Success(R, E);
}

case X86::BI__builtin_ia32_insertf32x4_256:
case X86::BI__builtin_ia32_inserti32x4_256:
case X86::BI__builtin_ia32_insertf64x2_256:
Expand Down
13 changes: 13 additions & 0 deletions clang/test/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -6892,80 +6892,93 @@ __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
return _mm512_shuffle_f32x4(__A, __B, 4);
}
TEST_CONSTEXPR(match_m512(_mm512_shuffle_f32x4(((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m512){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0}), 0b11111111), 13.0f, 14.0f, 15.0f, 16.0f, 13.0f, 14.0f, 15.0f, 16.0f, 130.0, 140.0, 150.0, 160.0, 130.0, 140.0, 150.0, 160.0));

__m512 test_mm512_mask_shuffle_f32x4(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_mask_shuffle_f32x4
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_shuffle_f32x4(__W, __U, __A, __B, 4);
}
TEST_CONSTEXPR(match_m512(_mm512_mask_shuffle_f32x4(((__m512){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, 900.0f, 1000.0f, 1100.0f, 1200.0f, 1300.0f, 1400.0f, 1500.0f, 1600.0f}), 0b1111111111111110, ((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m512){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0}), 0b11111111), 100.0f, 14.0f, 15.0f, 16.0f, 13.0f, 14.0f, 15.0f, 16.0f, 130.0, 140.0, 150.0, 160.0, 130.0, 140.0, 150.0, 160.0));

__m512 test_mm512_maskz_shuffle_f32x4(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_maskz_shuffle_f32x4
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_shuffle_f32x4(__U, __A, __B, 4);
}
TEST_CONSTEXPR(match_m512(_mm512_maskz_shuffle_f32x4(0b1111111111110111, ((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m512){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0}), 0b11111111), 13.0f, 14.0f, 15.0f, 0.0f, 13.0f, 14.0f, 15.0f, 16.0f, 130.0, 140.0, 150.0, 160.0, 130.0, 140.0, 150.0, 160.0));

__m512d test_mm512_shuffle_f64x2(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_shuffle_f64x2
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
return _mm512_shuffle_f64x2(__A, __B, 4);
}
TEST_CONSTEXPR(match_m512d(_mm512_shuffle_f64x2(((__m512d){1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0}), ((__m512d){10.0,20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 0b10101100), 1.0, 2.0, 7.0, 8.0, 50.0, 60.0, 50.0, 60.0));

__m512d test_mm512_mask_shuffle_f64x2(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_mask_shuffle_f64x2
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_shuffle_f64x2(__W, __U, __A, __B, 4);
}
TEST_CONSTEXPR(match_m512d(_mm512_mask_shuffle_f64x2(((__m512d){100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0}), 0b11110000, ((__m512d){1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0}), ((__m512d){10.0,20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 0b10101100), 100.0, 200.0, 300.0, 400.0, 50.0, 60.0, 50.0, 60.0));

__m512d test_mm512_maskz_shuffle_f64x2(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_maskz_shuffle_f64x2
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_shuffle_f64x2(__U, __A, __B, 4);
}
TEST_CONSTEXPR(match_m512d(_mm512_maskz_shuffle_f64x2(0b11110100, ((__m512d){1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0}), ((__m512d){10.0,20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 0b10101100), 0.0, 0.0, 7.0, 0.0, 50.0, 60.0, 50.0, 60.0));

__m512i test_mm512_shuffle_i32x4(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_shuffle_i32x4
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
return _mm512_shuffle_i32x4(__A, __B, 4);
}
TEST_CONSTEXPR(match_v16si(_mm512_shuffle_i32x4(((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m512i)(__v16si){10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160}), 0), 1, 2, 3, 4, 1, 2, 3, 4, 10, 20, 30, 40, 10, 20, 30, 40));


__m512i test_mm512_mask_shuffle_i32x4(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_shuffle_i32x4
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_shuffle_i32x4(__W, __U, __A, __B, 4);
}
TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_i32x4(((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}), 0b1111111111111011, ((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m512i)(__v16si){10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160}), 0), 1, 2, 300, 4, 1, 2, 3, 4, 10, 20, 30, 40, 10, 20, 30, 40));

__m512i test_mm512_maskz_shuffle_i32x4(__mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_shuffle_i32x4
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_shuffle_i32x4(__U, __A, __B, 4);
}
TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_i32x4(0b1011111111111111, ((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m512i)(__v16si){10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160}), 0), 1, 2, 3, 4, 1, 2, 3, 4, 10, 20, 30, 40, 10, 20, 0, 40));

__m512i test_mm512_shuffle_i64x2(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_shuffle_i64x2
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
return _mm512_shuffle_i64x2(__A, __B, 4);
}
TEST_CONSTEXPR(match_m512i(_mm512_shuffle_i64x2(((__m512i){1, 2, 3, 4, 5, 6, 7, 8}), ((__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 0b11000110), 5, 6, 3, 4, 10, 20, 70, 80));

__m512i test_mm512_mask_shuffle_i64x2(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_shuffle_i64x2
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_shuffle_i64x2(__W, __U, __A, __B, 4);
}
TEST_CONSTEXPR(match_m512i(_mm512_mask_shuffle_i64x2(((__m512i){100, 200, 300, 400, 500, 600, 700, 800}), 0b11111101, ((__m512i){1, 2, 3, 4, 5, 6, 7, 8}), ((__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 0b11000110), 5, 200, 3, 4, 10, 20, 70, 80));

__m512i test_mm512_maskz_shuffle_i64x2(__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_shuffle_i64x2
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_shuffle_i64x2(__U, __A, __B, 4);
}
TEST_CONSTEXPR(match_m512i(_mm512_maskz_shuffle_i64x2(0b00111101, ((__m512i){1, 2, 3, 4, 5, 6, 7, 8}), ((__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 0b11000110), 5, 0, 3, 4, 10, 20, 0, 0));

__m512d test_mm512_shuffle_pd(__m512d __M, __m512d __V) {
// CHECK-LABEL: test_mm512_shuffle_pd
Expand Down
Loading
Loading