Skip to content

Commit d4cafad

Browse files
0xzreaadeshps-mcw
authored andcommitted
[clang][X86] Allow VALIGND/Q element-shift intrinsics in constexpr evaluation (llvm#168206)
Fixes llvm#167681
1 parent 74f35b5 commit d4cafad

File tree

5 files changed

+126
-5
lines changed

5 files changed

+126
-5
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,24 +1045,24 @@ let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
10451045
def storeaps512_mask : X86Builtin<"void(_Vector<16, float *>, _Vector<16, float>, unsigned short)">;
10461046
}
10471047

1048-
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1048+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
10491049
def alignq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
10501050
def alignd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
10511051
}
10521052

1053-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1053+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
10541054
def alignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
10551055
}
10561056

1057-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1057+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
10581058
def alignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
10591059
}
10601060

1061-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1061+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
10621062
def alignq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
10631063
}
10641064

1065-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1065+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
10661066
def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
10671067
}
10681068

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4906,6 +4906,24 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
49064906
return std::pair<unsigned, int>{VecIdx, ElemIdx};
49074907
});
49084908

4909+
case X86::BI__builtin_ia32_alignd128:
4910+
case X86::BI__builtin_ia32_alignd256:
4911+
case X86::BI__builtin_ia32_alignd512:
4912+
case X86::BI__builtin_ia32_alignq128:
4913+
case X86::BI__builtin_ia32_alignq256:
4914+
case X86::BI__builtin_ia32_alignq512: {
4915+
unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements();
4916+
return interp__builtin_ia32_shuffle_generic(
4917+
S, OpPC, Call, [NumElems](unsigned DstIdx, unsigned Shift) {
4918+
unsigned Imm = Shift & 0xFF;
4919+
unsigned EffectiveShift = Imm & (NumElems - 1);
4920+
unsigned SourcePos = DstIdx + EffectiveShift;
4921+
unsigned VecIdx = SourcePos < NumElems ? 1u : 0u;
4922+
unsigned ElemIdx = SourcePos & (NumElems - 1);
4923+
return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)};
4924+
});
4925+
}
4926+
49094927
default:
49104928
S.FFDiag(S.Current->getLocation(OpPC),
49114929
diag::note_invalid_subexpr_in_const_expr)

clang/lib/AST/ExprConstant.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13627,6 +13627,28 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1362713627
return false;
1362813628
return Success(R, E);
1362913629
}
13630+
case X86::BI__builtin_ia32_alignd128:
13631+
case X86::BI__builtin_ia32_alignd256:
13632+
case X86::BI__builtin_ia32_alignd512:
13633+
case X86::BI__builtin_ia32_alignq128:
13634+
case X86::BI__builtin_ia32_alignq256:
13635+
case X86::BI__builtin_ia32_alignq512: {
13636+
APValue R;
13637+
unsigned NumElems = E->getType()->castAs<VectorType>()->getNumElements();
13638+
if (!evalShuffleGeneric(Info, E, R,
13639+
[NumElems](unsigned DstIdx, unsigned Shift) {
13640+
unsigned Imm = Shift & 0xFF;
13641+
unsigned EffectiveShift = Imm & (NumElems - 1);
13642+
unsigned SourcePos = DstIdx + EffectiveShift;
13643+
unsigned VecIdx = SourcePos < NumElems ? 1 : 0;
13644+
unsigned ElemIdx = SourcePos & (NumElems - 1);
13645+
13646+
return std::pair<unsigned, int>{
13647+
VecIdx, static_cast<int>(ElemIdx)};
13648+
}))
13649+
return false;
13650+
return Success(R, E);
13651+
}
1363013652
case X86::BI__builtin_ia32_permvarsi256:
1363113653
case X86::BI__builtin_ia32_permvarsf256:
1363213654
case X86::BI__builtin_ia32_permvardf512:

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,40 @@ __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b)
519519
return _mm512_maskz_alignr_epi64(u, a, b, 2);
520520
}
521521

522+
TEST_CONSTEXPR(match_v16si(_mm512_alignr_epi32(((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
523+
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
524+
((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
525+
8, 9, 10, 11, 12, 13, 14, 15}), 19),
526+
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 100, 200, 300));
527+
TEST_CONSTEXPR(match_v16si(_mm512_mask_alignr_epi32(((__m512i)(__v16si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000,
528+
9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000}),
529+
0xA5A5,
530+
((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
531+
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
532+
((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
533+
8, 9, 10, 11, 12, 13, 14, 15}), 19),
534+
3, 2000, 5, 4000, 5000, 8, 7000, 10,
535+
11, 10000, 13, 12000, 13000, 100, 15000, 300));
536+
TEST_CONSTEXPR(match_v16si(_mm512_maskz_alignr_epi32(0x0F0F,
537+
((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
538+
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
539+
((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
540+
8, 9, 10, 11, 12, 13, 14, 15}), 19),
541+
3, 4, 5, 6, 0, 0, 0, 0, 11, 12, 13, 14, 0, 0, 0, 0));
542+
543+
TEST_CONSTEXPR(match_v8di(_mm512_alignr_epi64(((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
544+
((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
545+
4, 5, 6, 7, 8, 10, 11, 12));
546+
TEST_CONSTEXPR(match_v8di(_mm512_mask_alignr_epi64(((__m512i)(__v8di){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}),
547+
0xA5,
548+
((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
549+
((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
550+
4, 2000, 6, 4000, 5000, 10, 7000, 12));
551+
TEST_CONSTEXPR(match_v8di(_mm512_maskz_alignr_epi64(0x33,
552+
((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
553+
((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
554+
4, 5, 0, 0, 8, 10, 0, 0));
555+
522556
__m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
523557
// CHECK-LABEL: test_mm512_fmadd_round_pd
524558
// CHECK: @llvm.x86.avx512.vfmadd.pd.512

clang/test/CodeGen/X86/avx512vl-builtins.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10642,6 +10642,53 @@ __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
1064210642
return _mm256_maskz_alignr_epi64(__U, __A, __B, 1);
1064310643
}
1064410644

10645+
TEST_CONSTEXPR(match_v4si(_mm_alignr_epi32(((__m128i)(__v4si){100, 200, 300, 400}),
10646+
((__m128i)(__v4si){10, 20, 30, 40}), 1),
10647+
20, 30, 40, 100));
10648+
TEST_CONSTEXPR(match_v4si(_mm_mask_alignr_epi32(((__m128i)(__v4si){1000, 2000, 3000, 4000}), 0x5,
10649+
((__m128i)(__v4si){100, 200, 300, 400}),
10650+
((__m128i)(__v4si){10, 20, 30, 40}), 1),
10651+
20, 2000, 40, 4000));
10652+
TEST_CONSTEXPR(match_v4si(_mm_maskz_alignr_epi32(0x3,
10653+
((__m128i)(__v4si){100, 200, 300, 400}),
10654+
((__m128i)(__v4si){10, 20, 30, 40}), 1),
10655+
20, 30, 0, 0));
10656+
10657+
TEST_CONSTEXPR(match_v8si(_mm256_alignr_epi32(((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
10658+
((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
10659+
4, 5, 6, 7, 8, 100, 200, 300));
10660+
TEST_CONSTEXPR(match_v8si(_mm256_mask_alignr_epi32(((__m256i)(__v8si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}),
10661+
0xA5,
10662+
((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
10663+
((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
10664+
4, 2000, 6, 4000, 5000, 100, 7000, 300));
10665+
TEST_CONSTEXPR(match_v8si(_mm256_maskz_alignr_epi32(0x33,
10666+
((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
10667+
((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
10668+
4, 5, 0, 0, 8, 100, 0, 0));
10669+
10670+
TEST_CONSTEXPR(match_v2di(_mm_alignr_epi64(((__m128i)(__v2di){10, 11}), ((__m128i)(__v2di){1, 2}), 1), 2, 10));
10671+
TEST_CONSTEXPR(match_v2di(_mm_mask_alignr_epi64(((__m128i)(__v2di){1000, 2000}), 0x1,
10672+
((__m128i)(__v2di){10, 11}),
10673+
((__m128i)(__v2di){1, 2}), 1),
10674+
2, 2000));
10675+
TEST_CONSTEXPR(match_v2di(_mm_maskz_alignr_epi64(0x2,
10676+
((__m128i)(__v2di){10, 11}),
10677+
((__m128i)(__v2di){1, 2}), 1),
10678+
0, 10));
10679+
10680+
TEST_CONSTEXPR(match_v4di(_mm256_alignr_epi64(((__m256i)(__v4di){10, 11, 12, 13}),
10681+
((__m256i)(__v4di){1, 2, 3, 4}), 2),
10682+
3, 4, 10, 11));
10683+
TEST_CONSTEXPR(match_v4di(_mm256_mask_alignr_epi64(((__m256i)(__v4di){1000, 2000, 3000, 4000}), 0x5,
10684+
((__m256i)(__v4di){10, 11, 12, 13}),
10685+
((__m256i)(__v4di){1, 2, 3, 4}), 2),
10686+
3, 2000, 10, 4000));
10687+
TEST_CONSTEXPR(match_v4di(_mm256_maskz_alignr_epi64(0xA,
10688+
((__m256i)(__v4di){10, 11, 12, 13}),
10689+
((__m256i)(__v4di){1, 2, 3, 4}), 2),
10690+
0, 4, 0, 11));
10691+
1064510692
__m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
1064610693
// CHECK-LABEL: test_mm_mask_movehdup_ps
1064710694
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>

0 commit comments

Comments
 (0)