llvm
diff --git a/‎clang-tools-extra/docs/clang-tidy/checks/modernize/use-ranges.rst‎
Lines changed: 3 additions & 0 deletions b/‎clang-tools-extra/docs/clang-tidy/checks/modernize/use-ranges.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎clang/docs/LanguageExtensions.rst‎
Lines changed: 4 additions & 3 deletions b/‎clang/docs/LanguageExtensions.rst‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 2 additions & 1 deletion b/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎clang/include/clang/Basic/Builtins.td‎
Lines changed: 2 additions & 2 deletions b/‎clang/include/clang/Basic/Builtins.td‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎clang/lib/AST/ExprConstant.cpp‎
Lines changed: 11 additions & 1 deletion b/‎clang/lib/AST/ExprConstant.cpp‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎clang/lib/CodeGen/Targets/AArch64.cpp‎
Lines changed: 44 additions & 24 deletions b/‎clang/lib/CodeGen/Targets/AArch64.cpp‎
Lines changed: 44 additions & 24 deletions
diff --git a/‎clang/lib/Headers/avx10_2_512convertintrin.h‎
Lines changed: 2 additions & 2 deletions b/‎clang/lib/Headers/avx10_2_512convertintrin.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎clang/lib/Headers/avx10_2convertintrin.h‎
Lines changed: 2 additions & 2 deletions b/‎clang/lib/Headers/avx10_2convertintrin.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎clang/lib/Serialization/ASTReader.cpp‎
Lines changed: 2 additions & 1 deletion b/‎clang/lib/Serialization/ASTReader.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎clang/test/CodeGen/AArch64/pure-scalable-args.c‎
Lines changed: 19 additions & 0 deletions b/‎clang/test/CodeGen/AArch64/pure-scalable-args.c‎
Lines changed: 19 additions & 0 deletions
@@ -104,6 +104,9 @@ Calls to the following std library algorithms are checked:
 ``std::unique``,
 ``std::upper_bound``.
 
+Note: some range algorithms for ``vector<bool>`` require C++23 because it uses
+proxy iterators.
+
 Reverse Iteration
 -----------------
 
 
@@ -736,9 +736,10 @@ at the end to the next power of 2.
 
 These reductions support both fixed-sized and scalable vector types.
 
-The integer reduction intrinsics, including ``__builtin_reduce_add``,
-``__builtin_reduce_mul``, ``__builtin_reduce_and``, ``__builtin_reduce_or``,
-and ``__builtin_reduce_xor``, can be called in a ``constexpr`` context.
+The integer reduction intrinsics, including ``__builtin_reduce_max``,
+``__builtin_reduce_min``, ``__builtin_reduce_add``, ``__builtin_reduce_mul``,
+``__builtin_reduce_and``, ``__builtin_reduce_or``, and ``__builtin_reduce_xor``,
+can be called in a ``constexpr`` context.
 
 Example:
 
 
@@ -421,7 +421,8 @@ Non-comprehensive list of changes in this release
   ``__builtin_reduce_mul``, ``__builtin_reduce_and``, ``__builtin_reduce_or``,
   ``__builtin_reduce_xor``, ``__builtin_elementwise_popcount``,
   ``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
-  ``__builtin_elementwise_sub_sat``.
+  ``__builtin_elementwise_sub_sat``, ``__builtin_reduce_min`` (For integral element type),
+  ``__builtin_reduce_max`` (For integral element type).
 
 - Clang now rejects ``_BitInt`` matrix element types if the bit width is less than ``CHAR_WIDTH`` or
   not a power of two, matching preexisting behaviour for vector types.
 
@@ -1462,13 +1462,13 @@ def ElementwiseSubSat : Builtin {
 
 def ReduceMax : Builtin {
   let Spellings = ["__builtin_reduce_max"];
-  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
   let Prototype = "void(...)";
 }
 
 def ReduceMin : Builtin {
   let Spellings = ["__builtin_reduce_min"];
-  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
   let Prototype = "void(...)";
 }
 
 
@@ -13604,7 +13604,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
   case Builtin::BI__builtin_reduce_mul:
   case Builtin::BI__builtin_reduce_and:
   case Builtin::BI__builtin_reduce_or:
-  case Builtin::BI__builtin_reduce_xor: {
+  case Builtin::BI__builtin_reduce_xor:
+  case Builtin::BI__builtin_reduce_min:
+  case Builtin::BI__builtin_reduce_max: {
     APValue Source;
     if (!EvaluateAsRValue(Info, E->getArg(0), Source))
       return false;
@@ -13641,6 +13643,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         Reduced ^= Source.getVectorElt(EltNum).getInt();
         break;
       }
+      case Builtin::BI__builtin_reduce_min: {
+        Reduced = std::min(Reduced, Source.getVectorElt(EltNum).getInt());
+        break;
+      }
+      case Builtin::BI__builtin_reduce_max: {
+        Reduced = std::max(Reduced, Source.getVectorElt(EltNum).getInt());
+        break;
+      }
       }
     }
 
 
@@ -52,6 +52,7 @@ class AArch64ABIInfo : public ABIInfo {
 
   bool isIllegalVectorType(QualType Ty) const;
 
+  bool passAsAggregateType(QualType Ty) const;
   bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP,
                               SmallVectorImpl<llvm::Type *> &CoerceToSeq) const;
 
@@ -337,6 +338,10 @@ ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
   NSRN += NVec;
   NPRN += NPred;
 
+  // Handle SVE vector tuples.
+  if (Ty->isSVESizelessBuiltinType())
+    return ABIArgInfo::getDirect();
+
   llvm::Type *UnpaddedCoerceToType =
       UnpaddedCoerceToSeq.size() == 1
           ? UnpaddedCoerceToSeq[0]
@@ -362,7 +367,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
   if (isIllegalVectorType(Ty))
     return coerceIllegalVector(Ty, NSRN, NPRN);
 
-  if (!isAggregateTypeForABI(Ty)) {
+  if (!passAsAggregateType(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
@@ -417,7 +422,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
   // elsewhere for GNU compatibility.
   uint64_t Size = getContext().getTypeSize(Ty);
   bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
-  if (IsEmpty || Size == 0) {
+  if (!Ty->isSVESizelessBuiltinType() && (IsEmpty || Size == 0)) {
     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
       return ABIArgInfo::getIgnore();
 
@@ -504,7 +509,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
     return getNaturalAlignIndirect(RetTy);
 
-  if (!isAggregateTypeForABI(RetTy)) {
+  if (!passAsAggregateType(RetTy)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
       RetTy = EnumTy->getDecl()->getIntegerType();
@@ -519,7 +524,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
   }
 
   uint64_t Size = getContext().getTypeSize(RetTy);
-  if (isEmptyRecord(getContext(), RetTy, true) || Size == 0)
+  if (!RetTy->isSVESizelessBuiltinType() &&
+      (isEmptyRecord(getContext(), RetTy, true) || Size == 0))
     return ABIArgInfo::getIgnore();
 
   const Type *Base = nullptr;
@@ -654,6 +660,15 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
   return true;
 }
 
+bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const {
+  if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) {
+    const auto *BT = Ty->getAs<BuiltinType>();
+    return !BT->isSVECount() &&
+           getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1;
+  }
+  return isAggregateTypeForABI(Ty);
+}
+
 // Check if a type needs to be passed in registers as a Pure Scalable Type (as
 // defined by AAPCS64). Return the number of data vectors and the number of
 // predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon
@@ -719,37 +734,38 @@ bool AArch64ABIInfo::passAsPureScalableType(
     return true;
   }
 
-  const auto *VT = Ty->getAs<VectorType>();
-  if (!VT)
-    return false;
+  if (const auto *VT = Ty->getAs<VectorType>()) {
+    if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
+      ++NPred;
+      if (CoerceToSeq.size() + 1 > 12)
+        return false;
+      CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
+      return true;
+    }
 
-  if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
-    ++NPred;
-    if (CoerceToSeq.size() + 1 > 12)
-      return false;
-    CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
-    return true;
-  }
+    if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
+      ++NVec;
+      if (CoerceToSeq.size() + 1 > 12)
+        return false;
+      CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
+      return true;
+    }
 
-  if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
-    ++NVec;
-    if (CoerceToSeq.size() + 1 > 12)
-      return false;
-    CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
-    return true;
+    return false;
   }
 
-  if (!VT->isBuiltinType())
+  if (!Ty->isBuiltinType())
     return false;
 
-  switch (cast<BuiltinType>(VT)->getKind()) {
+  bool isPredicate;
+  switch (Ty->getAs<BuiltinType>()->getKind()) {
 #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId)                    \
   case BuiltinType::Id:                                                        \
-    ++NVec;                                                                    \
+    isPredicate = false;                                                       \
     break;
 #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId)                 \
   case BuiltinType::Id:                                                        \
-    ++NPred;                                                                   \
+    isPredicate = true;                                                        \
     break;
 #define SVE_TYPE(Name, Id, SingletonId)
 #include "clang/Basic/AArch64SVEACLETypes.def"
@@ -761,6 +777,10 @@ bool AArch64ABIInfo::passAsPureScalableType(
       getContext().getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
   assert(Info.NumVectors > 0 && Info.NumVectors <= 4 &&
          "Expected 1, 2, 3 or 4 vectors!");
+  if (isPredicate)
+    NPred += Info.NumVectors;
+  else
+    NVec += Info.NumVectors;
   auto VTy = llvm::ScalableVectorType::get(CGT.ConvertType(Info.ElementType),
                                            Info.EC.getKnownMinValue());
 
 
@@ -308,13 +308,13 @@ static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) {
 }
 
 static __inline __m512h __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask16 __U, __m256i __A) {
+_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) {
   return _mm512_castsi512_ph(
       _mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8));
 }
 
 static __inline __m512h __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtpbf8_ph(__mmask16 __U, __m256i __A) {
+_mm512_maskz_cvtpbf8_ph(__mmask32 __U, __m256i __A) {
   return _mm512_castsi512_ph(
       _mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8));
 }
 
@@ -580,13 +580,13 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtpbf8_ph(__m128i __A) {
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask8 __U, __m128i __A) {
+_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) {
   return _mm256_castsi256_ph(
       _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8));
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtpbf8_ph(__mmask8 __U, __m128i __A) {
+_mm256_maskz_cvtpbf8_ph(__mmask16 __U, __m128i __A) {
   return _mm256_castsi256_ph(
       _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8));
 }
 
@@ -10642,7 +10642,8 @@ void ASTReader::FinishedDeserializing() {
     // We do this now rather than in finishPendingActions because we want to
     // be able to walk the complete redeclaration chains of the updated decls.
     while (!PendingExceptionSpecUpdates.empty() ||
-           !PendingDeducedTypeUpdates.empty()) {
+           !PendingDeducedTypeUpdates.empty() ||
+           !PendingUndeducedFunctionDecls.empty()) {
       auto ESUpdates = std::move(PendingExceptionSpecUpdates);
       PendingExceptionSpecUpdates.clear();
       for (auto Update : ESUpdates) {
 
@@ -459,3 +459,22 @@ void test_va_arg(int n, ...) {
 // CHECK-DARWIN-NEXT:   call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap)
 // CHECK-DARWIN-NEXT:   ret void
 // CHECK-DARWIN-NEXT: }
+
+// Regression test for incorrect passing of SVE vector tuples
+// The whole `y` need to be passed indirectly.
+void test_tuple_reg_count(svfloat32_t x, svfloat32x2_t y) {
+  void test_tuple_reg_count_callee(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t,
+                                   svfloat32_t, svfloat32_t, svfloat32_t, svfloat32x2_t);
+  test_tuple_reg_count_callee(x, x, x, x, x, x, x, y);
+}
+// CHECK-AAPCS: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, ptr noundef)
+// CHECK-DARWIN: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+// Regression test for incorrect passing of SVE vector tuples
+// The whole `y` need to be passed indirectly.
+void test_tuple_reg_count_bool(svboolx4_t x, svboolx4_t y) {
+  void test_tuple_reg_count_bool_callee(svboolx4_t, svboolx4_t);
+  test_tuple_reg_count_bool_callee(x, y);
+}
+// CHECK-AAPCS:  declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, ptr noundef)
+// CHECK-DARWIN: declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
Original file line number	Diff line number	Diff line change
`@@ -1462,13 +1462,13 @@ def ElementwiseSubSat : Builtin {`
`1462`	`1462`
`1463`	`1463`	`def ReduceMax : Builtin {`
`1464`	`1464`	`let Spellings = ["__builtin_reduce_max"];`
`1465`		`- let Attributes = [NoThrow, Const, CustomTypeChecking];`
	`1465`	`+ let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];`
`1466`	`1466`	`let Prototype = "void(...)";`
`1467`	`1467`	`}`
`1468`	`1468`
`1469`	`1469`	`def ReduceMin : Builtin {`
`1470`	`1470`	`let Spellings = ["__builtin_reduce_min"];`
`1471`		`- let Attributes = [NoThrow, Const, CustomTypeChecking];`
	`1471`	`+ let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];`
`1472`	`1472`	`let Prototype = "void(...)";`
`1473`	`1473`	`}`
`1474`	`1474`
Original file line number	Diff line number	Diff line change
`@@ -308,13 +308,13 @@ static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) {`
`308`	`308`	`}`
`309`	`309`
`310`	`310`	`static __inline __m512h __DEFAULT_FN_ATTRS512`
`311`		`-_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask16 __U, __m256i __A) {`
	`311`	`+_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) {`
`312`	`312`	`return _mm512_castsi512_ph(`
`313`	`313`	`_mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8));`
`314`	`314`	`}`
`315`	`315`
`316`	`316`	`static __inline __m512h __DEFAULT_FN_ATTRS512`
`317`		`-_mm512_maskz_cvtpbf8_ph(__mmask16 __U, __m256i __A) {`
	`317`	`+_mm512_maskz_cvtpbf8_ph(__mmask32 __U, __m256i __A) {`
`318`	`318`	`return _mm512_castsi512_ph(`
`319`	`319`	`_mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8));`
`320`	`320`	`}`
Original file line number	Diff line number	Diff line change
`@@ -580,13 +580,13 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtpbf8_ph(__m128i __A) {`
`580`	`580`	`}`
`581`	`581`
`582`	`582`	`static __inline__ __m256h __DEFAULT_FN_ATTRS256`
`583`		`-_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask8 __U, __m128i __A) {`
	`583`	`+_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) {`
`584`	`584`	`return _mm256_castsi256_ph(`
`585`	`585`	`_mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8));`
`586`	`586`	`}`
`587`	`587`
`588`	`588`	`static __inline__ __m256h __DEFAULT_FN_ATTRS256`
`589`		`-_mm256_maskz_cvtpbf8_ph(__mmask8 __U, __m128i __A) {`
	`589`	`+_mm256_maskz_cvtpbf8_ph(__mmask16 __U, __m128i __A) {`
`590`	`590`	`return _mm256_castsi256_ph(`
`591`	`591`	`_mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8));`
`592`	`592`	`}`