Skip to content

Commit 99f0770

Browse files
authored
merge main into amd-staging (llvm#3926)
2 parents 19860bb + 79fa020 commit 99f0770

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+2343
-1813
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,8 @@ Bug Fixes to C++ Support
591591
- Fixed a bug where our ``member-like constrained friend`` checking caused an incorrect analysis of lambda captures. (#GH156225)
592592
- Fixed a crash when implicit conversions from initialize list to arrays of
593593
unknown bound during constant evaluation. (#GH151716)
594+
- Support the dynamic_cast to final class optimization with pointer
595+
authentication enabled. (#GH152601)
594596

595597
Bug Fixes to AST Handling
596598
^^^^^^^^^^^^^^^^^^^^^^^^^

clang/lib/CodeGen/CGExprCXX.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2290,8 +2290,7 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,
22902290
bool IsExact = !IsDynamicCastToVoid &&
22912291
CGM.getCodeGenOpts().OptimizationLevel > 0 &&
22922292
DestRecordTy->getAsCXXRecordDecl()->isEffectivelyFinal() &&
2293-
CGM.getCXXABI().shouldEmitExactDynamicCast(DestRecordTy) &&
2294-
!getLangOpts().PointerAuthCalls;
2293+
CGM.getCXXABI().shouldEmitExactDynamicCast(DestRecordTy);
22952294

22962295
std::optional<CGCXXABI::ExactDynamicCastInfo> ExactCastInfo;
22972296
if (IsExact) {

clang/lib/CodeGen/ItaniumCXXABI.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,7 +1741,14 @@ llvm::Value *ItaniumCXXABI::emitExactDynamicCast(
17411741
llvm::BasicBlock *CastFail) {
17421742
const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
17431743
const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl();
1744+
auto AuthenticateVTable = [&](Address ThisAddr, const CXXRecordDecl *Decl) {
1745+
if (!CGF.getLangOpts().PointerAuthCalls)
1746+
return;
1747+
(void)CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, Decl,
1748+
CodeGenFunction::VTableAuthMode::MustTrap);
1749+
};
17441750

1751+
bool PerformPostCastAuthentication = false;
17451752
llvm::Value *VTable = nullptr;
17461753
if (ExactCastInfo.RequiresCastToPrimaryBase) {
17471754
// Base appears in at least two different places. Find the most-derived
@@ -1752,8 +1759,16 @@ llvm::Value *ItaniumCXXABI::emitExactDynamicCast(
17521759
emitDynamicCastToVoid(CGF, ThisAddr, SrcRecordTy);
17531760
ThisAddr = Address(PrimaryBase, CGF.VoidPtrTy, ThisAddr.getAlignment());
17541761
SrcDecl = DestDecl;
1762+
// This unauthenticated load is unavoidable, so we're relying on the
1763+
// authenticated load in the dynamic cast to void, and we'll manually
1764+
// authenticate the resulting v-table at the end of the cast check.
1765+
PerformPostCastAuthentication = CGF.getLangOpts().PointerAuthCalls;
1766+
CGPointerAuthInfo StrippingAuthInfo(0, PointerAuthenticationMode::Strip,
1767+
false, false, nullptr);
17551768
Address VTablePtrPtr = ThisAddr.withElementType(CGF.VoidPtrPtrTy);
17561769
VTable = CGF.Builder.CreateLoad(VTablePtrPtr, "vtable");
1770+
if (PerformPostCastAuthentication)
1771+
VTable = CGF.EmitPointerAuthAuth(StrippingAuthInfo, VTable);
17571772
} else
17581773
VTable = CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, SrcDecl);
17591774

@@ -1770,8 +1785,32 @@ llvm::Value *ItaniumCXXABI::emitExactDynamicCast(
17701785
llvm::ConstantInt::get(CGF.PtrDiffTy, -Offset);
17711786
AdjustedThisPtr = CGF.Builder.CreateInBoundsGEP(CGF.CharTy, AdjustedThisPtr,
17721787
OffsetConstant);
1788+
PerformPostCastAuthentication = CGF.getLangOpts().PointerAuthCalls;
17731789
}
17741790

1791+
if (PerformPostCastAuthentication) {
1792+
// If we've changed the object pointer we authenticate the vtable pointer
1793+
// of the resulting object.
1794+
llvm::BasicBlock *NonNullBlock = CGF.Builder.GetInsertBlock();
1795+
llvm::BasicBlock *PostCastAuthSuccess =
1796+
CGF.createBasicBlock("dynamic_cast.postauth.success");
1797+
llvm::BasicBlock *PostCastAuthComplete =
1798+
CGF.createBasicBlock("dynamic_cast.postauth.complete");
1799+
CGF.Builder.CreateCondBr(Success, PostCastAuthSuccess,
1800+
PostCastAuthComplete);
1801+
CGF.EmitBlock(PostCastAuthSuccess);
1802+
Address AdjustedThisAddr =
1803+
Address(AdjustedThisPtr, CGF.IntPtrTy, CGF.getPointerAlign());
1804+
AuthenticateVTable(AdjustedThisAddr, DestDecl);
1805+
CGF.EmitBranch(PostCastAuthComplete);
1806+
CGF.EmitBlock(PostCastAuthComplete);
1807+
llvm::PHINode *PHI = CGF.Builder.CreatePHI(AdjustedThisPtr->getType(), 2);
1808+
PHI->addIncoming(AdjustedThisPtr, PostCastAuthSuccess);
1809+
llvm::Value *NullValue =
1810+
llvm::Constant::getNullValue(AdjustedThisPtr->getType());
1811+
PHI->addIncoming(NullValue, NonNullBlock);
1812+
AdjustedThisPtr = PHI;
1813+
}
17751814
CGF.Builder.CreateCondBr(Success, CastSuccess, CastFail);
17761815
return AdjustedThisPtr;
17771816
}

clang/lib/Headers/avx512vldqintrin.h

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -460,39 +460,39 @@ _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
460460
(__mmask8) __U);
461461
}
462462

463-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
464-
_mm_cvtepi64_pd (__m128i __A) {
463+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
464+
_mm_cvtepi64_pd(__m128i __A) {
465465
return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
466466
}
467467

468-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
469-
_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
468+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
469+
_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) {
470470
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
471471
(__v2df)_mm_cvtepi64_pd(__A),
472472
(__v2df)__W);
473473
}
474474

475-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
476-
_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
475+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
476+
_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) {
477477
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
478478
(__v2df)_mm_cvtepi64_pd(__A),
479479
(__v2df)_mm_setzero_pd());
480480
}
481481

482-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
483-
_mm256_cvtepi64_pd (__m256i __A) {
482+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
483+
_mm256_cvtepi64_pd(__m256i __A) {
484484
return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
485485
}
486486

487-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
488-
_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
487+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
488+
_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
489489
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
490490
(__v4df)_mm256_cvtepi64_pd(__A),
491491
(__v4df)__W);
492492
}
493493

494-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
495-
_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
494+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
495+
_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) {
496496
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
497497
(__v4df)_mm256_cvtepi64_pd(__A),
498498
(__v4df)_mm256_setzero_pd());
@@ -519,20 +519,20 @@ _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
519519
(__mmask8) __U);
520520
}
521521

522-
static __inline__ __m128 __DEFAULT_FN_ATTRS256
523-
_mm256_cvtepi64_ps (__m256i __A) {
522+
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
523+
_mm256_cvtepi64_ps(__m256i __A) {
524524
return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
525525
}
526526

527-
static __inline__ __m128 __DEFAULT_FN_ATTRS256
528-
_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
527+
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
528+
_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
529529
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
530530
(__v4sf)_mm256_cvtepi64_ps(__A),
531531
(__v4sf)__W);
532532
}
533533

534-
static __inline__ __m128 __DEFAULT_FN_ATTRS256
535-
_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
534+
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
535+
_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) {
536536
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
537537
(__v4sf)_mm256_cvtepi64_ps(__A),
538538
(__v4sf)_mm_setzero_ps());
@@ -706,39 +706,39 @@ _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
706706
(__mmask8) __U);
707707
}
708708

709-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
710-
_mm_cvtepu64_pd (__m128i __A) {
709+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
710+
_mm_cvtepu64_pd(__m128i __A) {
711711
return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
712712
}
713713

714-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
715-
_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
714+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
715+
_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) {
716716
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
717717
(__v2df)_mm_cvtepu64_pd(__A),
718718
(__v2df)__W);
719719
}
720720

721-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
722-
_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
721+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
722+
_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) {
723723
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
724724
(__v2df)_mm_cvtepu64_pd(__A),
725725
(__v2df)_mm_setzero_pd());
726726
}
727727

728-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
729-
_mm256_cvtepu64_pd (__m256i __A) {
728+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
729+
_mm256_cvtepu64_pd(__m256i __A) {
730730
return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
731731
}
732732

733-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
734-
_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
733+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
734+
_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
735735
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
736736
(__v4df)_mm256_cvtepu64_pd(__A),
737737
(__v4df)__W);
738738
}
739739

740-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
741-
_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
740+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
741+
_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) {
742742
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
743743
(__v4df)_mm256_cvtepu64_pd(__A),
744744
(__v4df)_mm256_setzero_pd());
@@ -765,20 +765,20 @@ _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
765765
(__mmask8) __U);
766766
}
767767

768-
static __inline__ __m128 __DEFAULT_FN_ATTRS256
769-
_mm256_cvtepu64_ps (__m256i __A) {
768+
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
769+
_mm256_cvtepu64_ps(__m256i __A) {
770770
return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
771771
}
772772

773-
static __inline__ __m128 __DEFAULT_FN_ATTRS256
774-
_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
773+
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
774+
_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
775775
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
776776
(__v4sf)_mm256_cvtepu64_ps(__A),
777777
(__v4sf)__W);
778778
}
779779

780-
static __inline__ __m128 __DEFAULT_FN_ATTRS256
781-
_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
780+
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
781+
_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) {
782782
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
783783
(__v4sf)_mm256_cvtepu64_ps(__A),
784784
(__v4sf)_mm_setzero_ps());

clang/lib/Headers/avx512vlfp16intrin.h

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,13 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) {
4646
return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
4747
}
4848

49-
static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) {
49+
static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
50+
_mm_set1_ph(_Float16 __h) {
5051
return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
5152
}
5253

53-
static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) {
54+
static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
55+
_mm256_set1_ph(_Float16 __h) {
5456
return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
5557
__h, __h, __h, __h, __h, __h, __h, __h};
5658
}
@@ -807,34 +809,35 @@ _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
807809
(__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
808810
}
809811

810-
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) {
812+
static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
813+
_mm_cvtepi16_ph(__m128i __A) {
811814
return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
812815
}
813816

814-
static __inline__ __m128h __DEFAULT_FN_ATTRS128
817+
static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
815818
_mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
816819
return (__m128h)__builtin_ia32_selectph_128(
817820
(__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
818821
}
819822

820-
static __inline__ __m128h __DEFAULT_FN_ATTRS128
823+
static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
821824
_mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
822825
return (__m128h)__builtin_ia32_selectph_128(
823826
(__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
824827
}
825828

826-
static __inline__ __m256h __DEFAULT_FN_ATTRS256
829+
static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
827830
_mm256_cvtepi16_ph(__m256i __A) {
828831
return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
829832
}
830833

831-
static __inline__ __m256h __DEFAULT_FN_ATTRS256
834+
static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
832835
_mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
833836
return (__m256h)__builtin_ia32_selectph_256(
834837
(__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
835838
}
836839

837-
static __inline__ __m256h __DEFAULT_FN_ATTRS256
840+
static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
838841
_mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
839842
return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
840843
(__v16hf)_mm256_cvtepi16_ph(__A),
@@ -911,17 +914,18 @@ _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
911914
(__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
912915
}
913916

914-
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) {
917+
static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
918+
_mm_cvtepu16_ph(__m128i __A) {
915919
return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
916920
}
917921

918-
static __inline__ __m128h __DEFAULT_FN_ATTRS128
922+
static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
919923
_mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
920924
return (__m128h)__builtin_ia32_selectph_128(
921925
(__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
922926
}
923927

924-
static __inline__ __m128h __DEFAULT_FN_ATTRS128
928+
static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
925929
_mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
926930
return (__m128h)__builtin_ia32_selectph_128(
927931
(__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());

0 commit comments

Comments
 (0)