Skip to content

Commit aa491fc

Browse files
authored
[X86] Add constexpr handling for XOP/AVX512 rotate by immediate intrinsics (#156047)
1 parent e68d66a commit aa491fc

File tree

6 files changed

+130
-13
lines changed

6 files changed

+130
-13
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -878,10 +878,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
878878
def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
879879
def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
880880
def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
881-
def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
882-
def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
883-
def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
884-
def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
885881
def vpshlb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
886882
def vpshlw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
887883
def vpshld : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
@@ -906,6 +902,13 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
906902
def vfrczpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
907903
}
908904

905+
let Features = "xop", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
906+
def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
907+
def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
908+
def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
909+
def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
910+
}
911+
909912
let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
910913
def vpermil2pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant char)">;
911914
def vpermil2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant char)">;
@@ -1989,21 +1992,21 @@ let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorW
19891992
def reduceps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
19901993
}
19911994

1992-
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1995+
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
19931996
def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
19941997
def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
19951998
def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
19961999
def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
19972000
}
19982001

1999-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2002+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
20002003
def prold128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
20012004
def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
20022005
def prolq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
20032006
def prorq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
20042007
}
20052008

2006-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2009+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
20072010
def prold256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
20082011
def prord256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
20092012
def prolq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3325,6 +3325,30 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
33253325
return LHS.lshr(RHS.getZExtValue());
33263326
});
33273327

3328+
case clang::X86::BI__builtin_ia32_vprotbi:
3329+
case clang::X86::BI__builtin_ia32_vprotdi:
3330+
case clang::X86::BI__builtin_ia32_vprotqi:
3331+
case clang::X86::BI__builtin_ia32_vprotwi:
3332+
case clang::X86::BI__builtin_ia32_prold128:
3333+
case clang::X86::BI__builtin_ia32_prold256:
3334+
case clang::X86::BI__builtin_ia32_prold512:
3335+
case clang::X86::BI__builtin_ia32_prolq128:
3336+
case clang::X86::BI__builtin_ia32_prolq256:
3337+
case clang::X86::BI__builtin_ia32_prolq512:
3338+
return interp__builtin_elementwise_int_binop(
3339+
S, OpPC, Call, BuiltinID,
3340+
[](const APSInt &LHS, const APSInt &RHS) { return LHS.rotl(RHS); });
3341+
3342+
case clang::X86::BI__builtin_ia32_prord128:
3343+
case clang::X86::BI__builtin_ia32_prord256:
3344+
case clang::X86::BI__builtin_ia32_prord512:
3345+
case clang::X86::BI__builtin_ia32_prorq128:
3346+
case clang::X86::BI__builtin_ia32_prorq256:
3347+
case clang::X86::BI__builtin_ia32_prorq512:
3348+
return interp__builtin_elementwise_int_binop(
3349+
S, OpPC, Call, BuiltinID,
3350+
[](const APSInt &LHS, const APSInt &RHS) { return LHS.rotr(RHS); });
3351+
33283352
case Builtin::BI__builtin_elementwise_max:
33293353
case Builtin::BI__builtin_elementwise_min:
33303354
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);

clang/lib/AST/ExprConstant.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11835,6 +11835,62 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1183511835

1183611836
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1183711837
}
11838+
case clang::X86::BI__builtin_ia32_vprotbi:
11839+
case clang::X86::BI__builtin_ia32_vprotdi:
11840+
case clang::X86::BI__builtin_ia32_vprotqi:
11841+
case clang::X86::BI__builtin_ia32_vprotwi:
11842+
case clang::X86::BI__builtin_ia32_prold128:
11843+
case clang::X86::BI__builtin_ia32_prold256:
11844+
case clang::X86::BI__builtin_ia32_prold512:
11845+
case clang::X86::BI__builtin_ia32_prolq128:
11846+
case clang::X86::BI__builtin_ia32_prolq256:
11847+
case clang::X86::BI__builtin_ia32_prolq512: {
11848+
APValue SourceLHS, SourceRHS;
11849+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
11850+
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
11851+
return false;
11852+
11853+
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
11854+
bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
11855+
unsigned SourceLen = SourceLHS.getVectorLength();
11856+
SmallVector<APValue, 4> ResultElements;
11857+
ResultElements.reserve(SourceLen);
11858+
11859+
APSInt RHS = SourceRHS.getInt();
11860+
11861+
for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
11862+
const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
11863+
ResultElements.push_back(APValue(APSInt(LHS.rotl(RHS), DestUnsigned)));
11864+
}
11865+
11866+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
11867+
}
11868+
case clang::X86::BI__builtin_ia32_prord128:
11869+
case clang::X86::BI__builtin_ia32_prord256:
11870+
case clang::X86::BI__builtin_ia32_prord512:
11871+
case clang::X86::BI__builtin_ia32_prorq128:
11872+
case clang::X86::BI__builtin_ia32_prorq256:
11873+
case clang::X86::BI__builtin_ia32_prorq512: {
11874+
APValue SourceLHS, SourceRHS;
11875+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
11876+
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
11877+
return false;
11878+
11879+
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
11880+
bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
11881+
unsigned SourceLen = SourceLHS.getVectorLength();
11882+
SmallVector<APValue, 4> ResultElements;
11883+
ResultElements.reserve(SourceLen);
11884+
11885+
APSInt RHS = SourceRHS.getInt();
11886+
11887+
for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
11888+
const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
11889+
ResultElements.push_back(APValue(APSInt(LHS.rotr(RHS), DestUnsigned)));
11890+
}
11891+
11892+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
11893+
}
1183811894
case Builtin::BI__builtin_elementwise_max:
1183911895
case Builtin::BI__builtin_elementwise_min: {
1184011896
APValue SourceLHS, SourceRHS;

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4004,7 +4004,6 @@ __m512i test_mm512_cvtepu32_epi64(__m256i __X) {
40044004
// CHECK: zext <8 x i32> %{{.*}} to <8 x i64>
40054005
return _mm512_cvtepu32_epi64(__X);
40064006
}
4007-
40084007
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), 4294897296, 2, 4294967295, 0, 1, 4294967294, 3, 4294967292));
40094008

40104009
__m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
@@ -4026,7 +4025,6 @@ __m512i test_mm512_cvtepu16_epi32(__m256i __A) {
40264025
// CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
40274026
return _mm512_cvtepu16_epi32(__A);
40284027
}
4029-
40304028
TEST_CONSTEXPR(match_v16si(_mm512_cvtepu16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 65236, 2, 65535, 0, 1, 65534, 3, 65532, 5, 65530, 7, 65528, 9, 65526, 11, 65524));
40314029

40324030
__m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
@@ -4048,7 +4046,6 @@ __m512i test_mm512_cvtepu16_epi64(__m128i __A) {
40484046
// CHECK: zext <8 x i16> %{{.*}} to <8 x i64>
40494047
return _mm512_cvtepu16_epi64(__A);
40504048
}
4051-
40524049
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));
40534050

40544051
__m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
@@ -4065,46 +4062,51 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) {
40654062
return _mm512_maskz_cvtepu16_epi64(__U, __A);
40664063
}
40674064

4068-
40694065
__m512i test_mm512_rol_epi32(__m512i __A) {
40704066
// CHECK-LABEL: test_mm512_rol_epi32
40714067
// CHECK: @llvm.fshl.v16i32
40724068
return _mm512_rol_epi32(__A, 5);
40734069
}
4070+
TEST_CONSTEXPR(match_v16si(_mm512_rol_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 96, -97, -129, 192, -193, 256, 288, -289, 352, -353, 416, -417, 480, -481));
40744071

40754072
__m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
40764073
// CHECK-LABEL: test_mm512_mask_rol_epi32
40774074
// CHECK: @llvm.fshl.v16i32
40784075
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
40794076
return _mm512_mask_rol_epi32(__W, __U, __A, 5);
40804077
}
4078+
TEST_CONSTEXPR(match_v16si(_mm512_mask_rol_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 99, 99, -129, 192, -193, 99, 99, 99, 99, -353, 99, 99, 480, -481));
40814079

40824080
__m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
40834081
// CHECK-LABEL: test_mm512_maskz_rol_epi32
40844082
// CHECK: @llvm.fshl.v16i32
40854083
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
40864084
return _mm512_maskz_rol_epi32(__U, __A, 5);
40874085
}
4086+
TEST_CONSTEXPR(match_v16si(_mm512_maskz_rol_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 96, -97, 0, 0, 0, 256, 288, -289, 352, 0, 416, -417, 0, 0));
40884087

40894088
__m512i test_mm512_rol_epi64(__m512i __A) {
40904089
// CHECK-LABEL: test_mm512_rol_epi64
40914090
// CHECK: @llvm.fshl.v8i64
40924091
return _mm512_rol_epi64(__A, 5);
40934092
}
4093+
TEST_CONSTEXPR(match_v8di(_mm512_rol_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));
40944094

40954095
__m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
40964096
// CHECK-LABEL: test_mm512_mask_rol_epi64
40974097
// CHECK: @llvm.fshl.v8i64
40984098
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
40994099
return _mm512_mask_rol_epi64(__W, __U, __A, 5);
41004100
}
4101+
TEST_CONSTEXPR(match_v8di(_mm512_mask_rol_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));
41014102

41024103
__m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
41034104
// CHECK-LABEL: test_mm512_maskz_rol_epi64
41044105
// CHECK: @llvm.fshl.v8i64
41054106
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
41064107
return _mm512_maskz_rol_epi64(__U, __A, 5);
41074108
}
4109+
TEST_CONSTEXPR(match_v8di(_mm512_maskz_rol_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));
41084110

41094111
__m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) {
41104112
// CHECK-LABEL: test_mm512_rolv_epi32
@@ -4151,41 +4153,46 @@ __m512i test_mm512_ror_epi32(__m512i __A) {
41514153
// CHECK: @llvm.fshr.v16i32
41524154
return _mm512_ror_epi32(__A, 5);
41534155
}
4156+
TEST_CONSTEXPR(match_v16si(_mm512_ror_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824, 1207959552, -1207959553, 1476395008, -1476395009, 1744830464, -1744830465, 2013265920, -2013265921));
41544157

41554158
__m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
41564159
// CHECK-LABEL: test_mm512_mask_ror_epi32
41574160
// CHECK: @llvm.fshr.v16i32
41584161
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
41594162
return _mm512_mask_ror_epi32(__W, __U, __A, 5);
41604163
}
4164+
TEST_CONSTEXPR(match_v16si(_mm512_mask_ror_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99, 99, 99, 99, -1476395009, 99, 99, 2013265920, -2013265921));
41614165

41624166
__m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
41634167
// CHECK-LABEL: test_mm512_maskz_ror_epi32
41644168
// CHECK: @llvm.fshr.v16i32
41654169
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
41664170
return _mm512_maskz_ror_epi32(__U, __A, 5);
41674171
}
4172+
TEST_CONSTEXPR(match_v16si(_mm512_maskz_ror_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 402653184, -402653185, 0, 0, 0, 1073741824, 1207959552, -1207959553, 1476395008, 0, 1744830464, -1744830465, 0, 0));
41684173

41694174
__m512i test_mm512_ror_epi64(__m512i __A) {
41704175
// CHECK-LABEL: test_mm512_ror_epi64
41714176
// CHECK: @llvm.fshr.v8i64
41724177
return _mm512_ror_epi64(__A, 5);
41734178
}
4179+
TEST_CONSTEXPR(match_v8di(_mm512_ror_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, -1729382256910270465LL, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 4611686018427387904LL));
41744180

41754181
__m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
41764182
// CHECK-LABEL: test_mm512_mask_ror_epi64
41774183
// CHECK: @llvm.fshr.v8i64
41784184
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
41794185
return _mm512_mask_ror_epi64(__W, __U, __A, 5);
41804186
}
4187+
TEST_CONSTEXPR(match_v8di(_mm512_mask_ror_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 99, 99, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 99));
41814188

41824189
__m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
41834190
// CHECK-LABEL: test_mm512_maskz_ror_epi64
41844191
// CHECK: @llvm.fshr.v8i64
41854192
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
41864193
return _mm512_maskz_ror_epi64(__U, __A, 5);
41874194
}
4188-
4195+
TEST_CONSTEXPR(match_v8di(_mm512_maskz_ror_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, 0, -2305843009213693953LL, 3458764513820540928LL, 0, 0));
41894196

41904197
__m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) {
41914198
// CHECK-LABEL: test_mm512_rorv_epi32

0 commit comments

Comments
 (0)