Skip to content

Commit a87d409

Browse files
committed
Merge remote-tracking branch 'origin/main' into vplan-compute-iv-end-values
2 parents 4bd9416 + 3cc311a commit a87d409

File tree

168 files changed

+7186
-1094
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

168 files changed

+7186
-1094
lines changed

clang-tools-extra/docs/clang-tidy/checks/modernize/use-ranges.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ Calls to the following std library algorithms are checked:
104104
``std::unique``,
105105
``std::upper_bound``.
106106

107+
Note: some range algorithms for ``vector<bool>`` require C++23 because it uses
108+
proxy iterators.
109+
107110
Reverse Iteration
108111
-----------------
109112

clang/docs/LanguageExtensions.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -736,9 +736,10 @@ at the end to the next power of 2.
736736

737737
These reductions support both fixed-sized and scalable vector types.
738738

739-
The integer reduction intrinsics, including ``__builtin_reduce_add``,
740-
``__builtin_reduce_mul``, ``__builtin_reduce_and``, ``__builtin_reduce_or``,
741-
and ``__builtin_reduce_xor``, can be called in a ``constexpr`` context.
739+
The integer reduction intrinsics, including ``__builtin_reduce_max``,
740+
``__builtin_reduce_min``, ``__builtin_reduce_add``, ``__builtin_reduce_mul``,
741+
``__builtin_reduce_and``, ``__builtin_reduce_or``, and ``__builtin_reduce_xor``,
742+
can be called in a ``constexpr`` context.
742743

743744
Example:
744745

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,8 @@ Non-comprehensive list of changes in this release
421421
``__builtin_reduce_mul``, ``__builtin_reduce_and``, ``__builtin_reduce_or``,
422422
``__builtin_reduce_xor``, ``__builtin_elementwise_popcount``,
423423
``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
424-
``__builtin_elementwise_sub_sat``.
424+
``__builtin_elementwise_sub_sat``, ``__builtin_reduce_min`` (For integral element type),
425+
``__builtin_reduce_max`` (For integral element type).
425426

426427
- Clang now rejects ``_BitInt`` matrix element types if the bit width is less than ``CHAR_WIDTH`` or
427428
not a power of two, matching preexisting behaviour for vector types.

clang/include/clang/Basic/Builtins.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,13 +1462,13 @@ def ElementwiseSubSat : Builtin {
14621462

14631463
def ReduceMax : Builtin {
14641464
let Spellings = ["__builtin_reduce_max"];
1465-
let Attributes = [NoThrow, Const, CustomTypeChecking];
1465+
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
14661466
let Prototype = "void(...)";
14671467
}
14681468

14691469
def ReduceMin : Builtin {
14701470
let Spellings = ["__builtin_reduce_min"];
1471-
let Attributes = [NoThrow, Const, CustomTypeChecking];
1471+
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
14721472
let Prototype = "void(...)";
14731473
}
14741474

clang/lib/AST/ExprConstant.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13604,7 +13604,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1360413604
case Builtin::BI__builtin_reduce_mul:
1360513605
case Builtin::BI__builtin_reduce_and:
1360613606
case Builtin::BI__builtin_reduce_or:
13607-
case Builtin::BI__builtin_reduce_xor: {
13607+
case Builtin::BI__builtin_reduce_xor:
13608+
case Builtin::BI__builtin_reduce_min:
13609+
case Builtin::BI__builtin_reduce_max: {
1360813610
APValue Source;
1360913611
if (!EvaluateAsRValue(Info, E->getArg(0), Source))
1361013612
return false;
@@ -13641,6 +13643,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1364113643
Reduced ^= Source.getVectorElt(EltNum).getInt();
1364213644
break;
1364313645
}
13646+
case Builtin::BI__builtin_reduce_min: {
13647+
Reduced = std::min(Reduced, Source.getVectorElt(EltNum).getInt());
13648+
break;
13649+
}
13650+
case Builtin::BI__builtin_reduce_max: {
13651+
Reduced = std::max(Reduced, Source.getVectorElt(EltNum).getInt());
13652+
break;
13653+
}
1364413654
}
1364513655
}
1364613656

clang/lib/CodeGen/Targets/AArch64.cpp

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class AArch64ABIInfo : public ABIInfo {
5252

5353
bool isIllegalVectorType(QualType Ty) const;
5454

55+
bool passAsAggregateType(QualType Ty) const;
5556
bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP,
5657
SmallVectorImpl<llvm::Type *> &CoerceToSeq) const;
5758

@@ -337,6 +338,10 @@ ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
337338
NSRN += NVec;
338339
NPRN += NPred;
339340

341+
// Handle SVE vector tuples.
342+
if (Ty->isSVESizelessBuiltinType())
343+
return ABIArgInfo::getDirect();
344+
340345
llvm::Type *UnpaddedCoerceToType =
341346
UnpaddedCoerceToSeq.size() == 1
342347
? UnpaddedCoerceToSeq[0]
@@ -362,7 +367,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
362367
if (isIllegalVectorType(Ty))
363368
return coerceIllegalVector(Ty, NSRN, NPRN);
364369

365-
if (!isAggregateTypeForABI(Ty)) {
370+
if (!passAsAggregateType(Ty)) {
366371
// Treat an enum type as its underlying type.
367372
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
368373
Ty = EnumTy->getDecl()->getIntegerType();
@@ -417,7 +422,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
417422
// elsewhere for GNU compatibility.
418423
uint64_t Size = getContext().getTypeSize(Ty);
419424
bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
420-
if (IsEmpty || Size == 0) {
425+
if (!Ty->isSVESizelessBuiltinType() && (IsEmpty || Size == 0)) {
421426
if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
422427
return ABIArgInfo::getIgnore();
423428

@@ -504,7 +509,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
504509
if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
505510
return getNaturalAlignIndirect(RetTy);
506511

507-
if (!isAggregateTypeForABI(RetTy)) {
512+
if (!passAsAggregateType(RetTy)) {
508513
// Treat an enum type as its underlying type.
509514
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
510515
RetTy = EnumTy->getDecl()->getIntegerType();
@@ -519,7 +524,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
519524
}
520525

521526
uint64_t Size = getContext().getTypeSize(RetTy);
522-
if (isEmptyRecord(getContext(), RetTy, true) || Size == 0)
527+
if (!RetTy->isSVESizelessBuiltinType() &&
528+
(isEmptyRecord(getContext(), RetTy, true) || Size == 0))
523529
return ABIArgInfo::getIgnore();
524530

525531
const Type *Base = nullptr;
@@ -654,6 +660,15 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
654660
return true;
655661
}
656662

663+
bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const {
664+
if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) {
665+
const auto *BT = Ty->getAs<BuiltinType>();
666+
return !BT->isSVECount() &&
667+
getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1;
668+
}
669+
return isAggregateTypeForABI(Ty);
670+
}
671+
657672
// Check if a type needs to be passed in registers as a Pure Scalable Type (as
658673
// defined by AAPCS64). Return the number of data vectors and the number of
659674
// predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon
@@ -719,37 +734,38 @@ bool AArch64ABIInfo::passAsPureScalableType(
719734
return true;
720735
}
721736

722-
const auto *VT = Ty->getAs<VectorType>();
723-
if (!VT)
724-
return false;
737+
if (const auto *VT = Ty->getAs<VectorType>()) {
738+
if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
739+
++NPred;
740+
if (CoerceToSeq.size() + 1 > 12)
741+
return false;
742+
CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
743+
return true;
744+
}
725745

726-
if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
727-
++NPred;
728-
if (CoerceToSeq.size() + 1 > 12)
729-
return false;
730-
CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
731-
return true;
732-
}
746+
if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
747+
++NVec;
748+
if (CoerceToSeq.size() + 1 > 12)
749+
return false;
750+
CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
751+
return true;
752+
}
733753

734-
if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
735-
++NVec;
736-
if (CoerceToSeq.size() + 1 > 12)
737-
return false;
738-
CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
739-
return true;
754+
return false;
740755
}
741756

742-
if (!VT->isBuiltinType())
757+
if (!Ty->isBuiltinType())
743758
return false;
744759

745-
switch (cast<BuiltinType>(VT)->getKind()) {
760+
bool isPredicate;
761+
switch (Ty->getAs<BuiltinType>()->getKind()) {
746762
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
747763
case BuiltinType::Id: \
748-
++NVec; \
764+
isPredicate = false; \
749765
break;
750766
#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \
751767
case BuiltinType::Id: \
752-
++NPred; \
768+
isPredicate = true; \
753769
break;
754770
#define SVE_TYPE(Name, Id, SingletonId)
755771
#include "clang/Basic/AArch64SVEACLETypes.def"
@@ -761,6 +777,10 @@ bool AArch64ABIInfo::passAsPureScalableType(
761777
getContext().getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
762778
assert(Info.NumVectors > 0 && Info.NumVectors <= 4 &&
763779
"Expected 1, 2, 3 or 4 vectors!");
780+
if (isPredicate)
781+
NPred += Info.NumVectors;
782+
else
783+
NVec += Info.NumVectors;
764784
auto VTy = llvm::ScalableVectorType::get(CGT.ConvertType(Info.ElementType),
765785
Info.EC.getKnownMinValue());
766786

clang/lib/Headers/avx10_2_512convertintrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,13 +308,13 @@ static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) {
308308
}
309309

310310
static __inline __m512h __DEFAULT_FN_ATTRS512
311-
_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask16 __U, __m256i __A) {
311+
_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) {
312312
return _mm512_castsi512_ph(
313313
_mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8));
314314
}
315315

316316
static __inline __m512h __DEFAULT_FN_ATTRS512
317-
_mm512_maskz_cvtpbf8_ph(__mmask16 __U, __m256i __A) {
317+
_mm512_maskz_cvtpbf8_ph(__mmask32 __U, __m256i __A) {
318318
return _mm512_castsi512_ph(
319319
_mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8));
320320
}

clang/lib/Headers/avx10_2convertintrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,13 +580,13 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtpbf8_ph(__m128i __A) {
580580
}
581581

582582
static __inline__ __m256h __DEFAULT_FN_ATTRS256
583-
_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask8 __U, __m128i __A) {
583+
_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) {
584584
return _mm256_castsi256_ph(
585585
_mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8));
586586
}
587587

588588
static __inline__ __m256h __DEFAULT_FN_ATTRS256
589-
_mm256_maskz_cvtpbf8_ph(__mmask8 __U, __m128i __A) {
589+
_mm256_maskz_cvtpbf8_ph(__mmask16 __U, __m128i __A) {
590590
return _mm256_castsi256_ph(
591591
_mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8));
592592
}

clang/lib/Serialization/ASTReader.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10642,7 +10642,8 @@ void ASTReader::FinishedDeserializing() {
1064210642
// We do this now rather than in finishPendingActions because we want to
1064310643
// be able to walk the complete redeclaration chains of the updated decls.
1064410644
while (!PendingExceptionSpecUpdates.empty() ||
10645-
!PendingDeducedTypeUpdates.empty()) {
10645+
!PendingDeducedTypeUpdates.empty() ||
10646+
!PendingUndeducedFunctionDecls.empty()) {
1064610647
auto ESUpdates = std::move(PendingExceptionSpecUpdates);
1064710648
PendingExceptionSpecUpdates.clear();
1064810649
for (auto Update : ESUpdates) {

clang/test/CodeGen/AArch64/pure-scalable-args.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,3 +459,22 @@ void test_va_arg(int n, ...) {
459459
// CHECK-DARWIN-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap)
460460
// CHECK-DARWIN-NEXT: ret void
461461
// CHECK-DARWIN-NEXT: }
462+
463+
// Regression test for incorrect passing of SVE vector tuples
464+
// The whole `y` need to be passed indirectly.
465+
void test_tuple_reg_count(svfloat32_t x, svfloat32x2_t y) {
466+
void test_tuple_reg_count_callee(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t,
467+
svfloat32_t, svfloat32_t, svfloat32_t, svfloat32x2_t);
468+
test_tuple_reg_count_callee(x, x, x, x, x, x, x, y);
469+
}
470+
// CHECK-AAPCS: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, ptr noundef)
471+
// CHECK-DARWIN: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
472+
473+
// Regression test for incorrect passing of SVE vector tuples
474+
// The whole `y` need to be passed indirectly.
475+
void test_tuple_reg_count_bool(svboolx4_t x, svboolx4_t y) {
476+
void test_tuple_reg_count_bool_callee(svboolx4_t, svboolx4_t);
477+
test_tuple_reg_count_bool_callee(x, y);
478+
}
479+
// CHECK-AAPCS: declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, ptr noundef)
480+
// CHECK-DARWIN: declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)

0 commit comments

Comments
 (0)