-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[clang] VectorExprEvaluator::VisitCallExpr - add constant folding for X86 pslldqi/psrldqi intrinsics #157403
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang] VectorExprEvaluator::VisitCallExpr - add constant folding for X86 pslldqi/psrldqi intrinsics #157403
Changes from 22 commits
38b90ec
eb258ba
f93dc8f
eb95364
f00eec1
015c774
bed3603
afe7818
6b0dc7b
2c6d360
9435420
e7356b2
ee6874b
814495e
7273801
2eed8b4
2b02883
a587fe8
3f4f873
7462ae3
6164028
fd1470e
4517c68
bc3739a
05efd71
7811cb8
6a07417
bb0f797
ce61b54
e86f26c
568b6fa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -267,7 +267,8 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in { | |
def _mm_pause : X86LibBuiltin<"void()">; | ||
} | ||
|
||
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { | ||
let Features = "sse2", | ||
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { | ||
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; | ||
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; | ||
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; | ||
|
@@ -568,7 +569,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in | |
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">; | ||
} | ||
|
||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { | ||
let Features = "avx2", | ||
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { | ||
|
||
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; | ||
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; | ||
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; | ||
|
@@ -2047,7 +2049,8 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect | |
def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; | ||
} | ||
|
||
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { | ||
let Features = "avx512bw", | ||
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { | ||
|
||
def psraw512 | ||
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; | ||
def psrlw512 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12127,6 +12127,49 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { | |
|
||
return Success(APValue(ResultElements.data(), ResultElements.size()), E); | ||
} | ||
case X86::BI__builtin_ia32_pslldqi128_byteshift: | ||
case X86::BI__builtin_ia32_psrldqi128_byteshift: | ||
case X86::BI__builtin_ia32_pslldqi256_byteshift: | ||
case X86::BI__builtin_ia32_psrldqi256_byteshift: | ||
case X86::BI__builtin_ia32_pslldqi512_byteshift: | ||
case X86::BI__builtin_ia32_psrldqi512_byteshift: { | ||
APSInt Amt; | ||
if (!EvaluateInteger(E->getArg(1), Amt, Info)) | ||
return false; | ||
unsigned ShiftVal = (unsigned)Amt.getZExtValue() & 0xff; | ||
|
||
APValue Vec; | ||
if (!Evaluate(Vec, Info, E->getArg(0)) || !Vec.isVector()) | ||
return false; | ||
|
||
unsigned NumElts = Vec.getVectorLength(); | ||
const unsigned LaneBytes = 16; | ||
assert(NumElts % LaneBytes == 0); | ||
|
||
SmallVector<APValue, 64> Result; | ||
Result.resize(NumElts, APValue(0)); | ||
|
||
bool IsLeft = | ||
(E->getBuiltinCallee() == X86::BI__builtin_ia32_pslldqi128_byteshift || | ||
E->getBuiltinCallee() == X86::BI__builtin_ia32_pslldqi256_byteshift || | ||
E->getBuiltinCallee() == X86::BI__builtin_ia32_pslldqi512_byteshift); | ||
|
||
if (ShiftVal >= LaneBytes) | ||
return ZeroInitialization(E); | ||
|
||
for (unsigned LaneBase = 0; LaneBase < NumElts; LaneBase += LaneBytes) { | ||
for (unsigned I = 0; I < LaneBytes; ++I) { | ||
int src = IsLeft ? (I + ShiftVal) : (int)I - (int)ShiftVal; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (style) |
||
|
||
if (src >= 0 && (unsigned)src < LaneBytes) | ||
Result[LaneBase + I] = Vec.getVectorElt(LaneBase + (unsigned)src); | ||
else | ||
Result[LaneBase + I] = APValue(0); | ||
} | ||
} | ||
|
||
return Success(APValue(Result.data(), Result.size()), E); | ||
} | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1553,13 +1553,18 @@ __m128i test_mm_srli_si128(__m128i A) { | |
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> | ||
return _mm_srli_si128(A, 5); | ||
} | ||
TEST_CONSTEXPR(match_v16qi(_mm_slli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 3), 0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,13)) | ||
TEST_CONSTEXPR(match_v16qi(_mm_slli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) | ||
|
||
__m128i test_mm_srli_si128_2(__m128i A) { | ||
// CHECK-LABEL: test_mm_srli_si128_2 | ||
// ret <2 x i64> zeroinitializer | ||
return _mm_srli_si128(A, 17); | ||
} | ||
|
||
TEST_CONSTEXPR(match_v16qi(_mm_srli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 3), 4,5,6,7,8,9,10,11,12,13,14,15,16,0,0,0)) | ||
TEST_CONSTEXPR(match_v16qi(_mm_srli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ALL of these tests are still failing as you're missing brackets around the vector initialisations (because _mm_srli_si128 is a macro it expands early).
Update all your other TEST_CONSTEXPR accordingly |
||
|
||
void test_mm_store_pd(double* A, __m128d B) { | ||
// CHECK-LABEL: test_mm_store_pd | ||
// CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can only add the constexpr attribute to the byteshift intrinsics - better to move the byteshift declarations into the constexor below (psrldi128 et al.)