-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[clang] VectorExprEvaluator::VisitCallExpr - add constant folding for X86 pslldqi/psrldqi intrinsics #157403
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang] VectorExprEvaluator::VisitCallExpr - add constant folding for X86 pslldqi/psrldqi intrinsics #157403
Changes from all commits
38b90ec
eb258ba
f93dc8f
eb95364
f00eec1
015c774
bed3603
afe7818
6b0dc7b
2c6d360
9435420
e7356b2
ee6874b
814495e
7273801
2eed8b4
2b02883
a587fe8
3f4f873
7462ae3
6164028
fd1470e
4517c68
bc3739a
05efd71
7811cb8
6a07417
bb0f797
ce61b54
e86f26c
568b6fa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -2878,6 +2878,35 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, | |||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| static bool interp__builtin_byteshift(InterpState &S, CodePtr OpPC, | ||||||
| const CallExpr *Call, uint32_t BuiltinID, | ||||||
| bool isLeft) { | ||||||
| APSInt Amt = popToAPSInt(S, Call->getArg(1)); | ||||||
| unsigned ShiftVal = (unsigned)Amt.getZExtValue() & 0xff; | ||||||
|
|
||||||
| const Pointer &VecPtr = S.Stk.pop<Pointer>(); | ||||||
| const Pointer &Dst = S.Stk.peek<Pointer>(); | ||||||
|
|
||||||
| unsigned NumElts = VecPtr.getNumElems(); | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| const unsigned LaneBytes = 16; | ||||||
| assert(NumElts % LaneBytes == 0); | ||||||
|
|
||||||
| for (unsigned LaneBase = 0; LaneBase < NumElts; LaneBase += LaneBytes) { | ||||||
| for (unsigned I = 0; I < LaneBytes; ++I) { | ||||||
| int Src = isLeft ? (I + ShiftVal) : (int)I - (int)ShiftVal; | ||||||
| if (Src >= 0 && (unsigned)Src < LaneBytes) { | ||||||
| Dst.elem<uint8_t>(LaneBase + I) = | ||||||
| VecPtr.elem<uint8_t>(LaneBase + (unsigned)Src); | ||||||
| } else { | ||||||
| Dst.elem<uint8_t>(LaneBase + I) = 0; | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| Dst.initializeAllElements(); | ||||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, | ||||||
| uint32_t BuiltinID) { | ||||||
| if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) | ||||||
|
|
@@ -3667,6 +3696,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, | |||||
| case Builtin::BI__builtin_elementwise_fshr: | ||||||
| return interp__builtin_elementwise_triop(S, OpPC, Call, | ||||||
| llvm::APIntOps::fshr); | ||||||
| case clang::X86::BI__builtin_ia32_pslldqi128_byteshift: | ||||||
| case clang::X86::BI__builtin_ia32_pslldqi256_byteshift: | ||||||
| case clang::X86::BI__builtin_ia32_pslldqi512_byteshift: | ||||||
| return interp__builtin_byteshift(S, OpPC, Call, BuiltinID, /*IsLeft=*/true); | ||||||
| case clang::X86::BI__builtin_ia32_psrldqi128_byteshift: | ||||||
| case clang::X86::BI__builtin_ia32_psrldqi256_byteshift: | ||||||
| case clang::X86::BI__builtin_ia32_psrldqi512_byteshift: | ||||||
| return interp__builtin_byteshift(S, OpPC, Call, BuiltinID, | ||||||
| /*IsLeft=*/false); | ||||||
|
|
||||||
| case X86::BI__builtin_ia32_insertf32x4_256: | ||||||
| case X86::BI__builtin_ia32_inserti32x4_256: | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12191,6 +12191,50 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { | |
| return Success(APValue(ResultElements.data(), ResultElements.size()), E); | ||
| } | ||
|
|
||
| case X86::BI__builtin_ia32_pslldqi128_byteshift: | ||
| case X86::BI__builtin_ia32_psrldqi128_byteshift: | ||
| case X86::BI__builtin_ia32_pslldqi256_byteshift: | ||
| case X86::BI__builtin_ia32_psrldqi256_byteshift: | ||
| case X86::BI__builtin_ia32_pslldqi512_byteshift: | ||
| case X86::BI__builtin_ia32_psrldqi512_byteshift: { | ||
| APSInt Amt; | ||
| if (!EvaluateInteger(E->getArg(1), Amt, Info)) | ||
| return false; | ||
| unsigned ShiftVal = (unsigned)Amt.getZExtValue() & 0xff; | ||
|
|
||
| APValue Vec; | ||
| if (!Evaluate(Vec, Info, E->getArg(0)) || !Vec.isVector()) | ||
| return false; | ||
|
|
||
| unsigned NumElts = Vec.getVectorLength(); | ||
| const unsigned LaneBytes = 16; | ||
| assert(NumElts % LaneBytes == 0); | ||
|
|
||
| SmallVector<APValue, 64> Result; | ||
| Result.resize(NumElts, APValue(0)); | ||
|
|
||
| bool IsLeft = | ||
| (E->getBuiltinCallee() == X86::BI__builtin_ia32_pslldqi128_byteshift || | ||
| E->getBuiltinCallee() == X86::BI__builtin_ia32_pslldqi256_byteshift || | ||
| E->getBuiltinCallee() == X86::BI__builtin_ia32_pslldqi512_byteshift); | ||
|
|
||
| if (ShiftVal >= LaneBytes) | ||
| return ZeroInitialization(E); | ||
|
|
||
| for (unsigned LaneBase = 0; LaneBase < NumElts; LaneBase += LaneBytes) { | ||
| for (unsigned I = 0; I < LaneBytes; ++I) { | ||
| int src = IsLeft ? (I + ShiftVal) : (int)I - (int)ShiftVal; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (style) |
||
|
|
||
| if (src >= 0 && (unsigned)src < LaneBytes) | ||
| Result[LaneBase + I] = Vec.getVectorElt(LaneBase + (unsigned)src); | ||
| else | ||
| Result[LaneBase + I] = APValue(0); | ||
| } | ||
| } | ||
|
|
||
| return Success(APValue(Result.data(), Result.size()), E); | ||
| } | ||
|
|
||
| case X86::BI__builtin_ia32_insertf32x4_256: | ||
| case X86::BI__builtin_ia32_inserti32x4_256: | ||
| case X86::BI__builtin_ia32_insertf64x2_256: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1562,13 +1562,18 @@ __m128i test_mm_srli_si128(__m128i A) { | |
| // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> | ||
| return _mm_srli_si128(A, 5); | ||
| } | ||
| TEST_CONSTEXPR(match_v16qi(_mm_slli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 3), 0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,13)) | ||
| TEST_CONSTEXPR(match_v16qi(_mm_slli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) | ||
|
|
||
| __m128i test_mm_srli_si128_2(__m128i A) { | ||
| // CHECK-LABEL: test_mm_srli_si128_2 | ||
| // ret <2 x i64> zeroinitializer | ||
| return _mm_srli_si128(A, 17); | ||
| } | ||
|
|
||
| TEST_CONSTEXPR(match_v16qi(_mm_srli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 3), 4,5,6,7,8,9,10,11,12,13,14,15,16,0,0,0)) | ||
| TEST_CONSTEXPR(match_v16qi(_mm_srli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ALL of these tests are still failing as you're missing brackets around the vector initialisations (because _mm_srli_si128 is a macro it expands early). Update all your other TEST_CONSTEXPR accordingly |
||
|
|
||
| void test_mm_store_pd(double* A, __m128d B) { | ||
| // CHECK-LABEL: test_mm_store_pd | ||
| // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was able to do it on my previous PC, but since I switched to a new one, I’ve been having trouble building due to storage limitations, so I couldn’t proceed. Sorry about that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’m sorry if I’m causing difficulties in your progress.