llvm
diff --git a/‎.github/new-prs-labeler.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/new-prs-labeler.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/llvm-tests.yml‎ renamed to ‎.github/workflows/llvm-abi-tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/llvm-tests.yml‎ renamed to ‎.github/workflows/llvm-abi-tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 6 additions & 0 deletions b/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎clang/include/clang/Basic/BuiltinsNVPTX.td‎
Lines changed: 57 additions & 0 deletions b/‎clang/include/clang/Basic/BuiltinsNVPTX.td‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎clang/include/clang/Basic/LangOptions.def‎
Lines changed: 0 additions & 1 deletion b/‎clang/include/clang/Basic/LangOptions.def‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h‎
Lines changed: 4 additions & 3 deletions b/‎clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎clang/include/clang/CIR/Dialect/IR/CIROps.td‎
Lines changed: 6 additions & 4 deletions b/‎clang/include/clang/CIR/Dialect/IR/CIROps.td‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎clang/include/clang/Driver/Options.td‎
Lines changed: 1 addition & 2 deletions b/‎clang/include/clang/Driver/Options.td‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎clang/include/clang/Sema/SemaConcept.h‎
Lines changed: 2 additions & 0 deletions b/‎clang/include/clang/Sema/SemaConcept.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎clang/lib/AST/ByteCode/InterpBuiltin.cpp‎
Lines changed: 9 additions & 24 deletions b/‎clang/lib/AST/ByteCode/InterpBuiltin.cpp‎
Lines changed: 9 additions & 24 deletions
@@ -1121,3 +1121,6 @@ tablegen:
   - llvm/include/TableGen/**
   - llvm/lib/TableGen/**
   - llvm/utils/TableGen/**
+
+infrastructure:
+  - .ci/**
@@ -1,4 +1,4 @@
-name: LLVM Tests
+name: LLVM ABI Tests
 
 permissions:
   contents: read
 
@@ -123,6 +123,9 @@ AST Dumping Potentially Breaking Changes
 
     ``__atomic_test_and_set(p, 0)``
 
+- Pretty-printing of templates with inherited (i.e. specified in a previous
+  redeclaration) default arguments has been fixed.
+
 Clang Frontend Potentially Breaking Changes
 -------------------------------------------
 - Members of anonymous unions/structs are now injected as ``IndirectFieldDecl``
@@ -250,6 +253,8 @@ Non-comprehensive list of changes in this release
 
 - ``__builtin_assume_dereferenceable`` now accepts non-constant size operands.
 
+- Fixed a crash when the second argument to ``__builtin_assume_aligned`` was not constant (#GH161314)
+
 New Compiler Flags
 ------------------
 - New option ``-fno-sanitize-debug-trap-reasons`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
@@ -452,6 +457,7 @@ Bug Fixes to AST Handling
 
 Miscellaneous Bug Fixes
 ^^^^^^^^^^^^^^^^^^^^^^^
+- Fixed missing diagnostics of ``diagnose_if`` on templates involved in initialization. (#GH160776)
 
 Miscellaneous Clang Crashes Fixed
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -579,11 +579,35 @@ def __nvvm_ff2bf16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)
 def __nvvm_ff2bf16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2bf16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2bf16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2bf16x2_rs :
+  NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2bf16x2_rs_relu :
+  NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2bf16x2_rs_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2bf16x2_rs_relu_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
 
 def __nvvm_ff2f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2f16x2_rs :
+  NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2f16x2_rs_relu :
+  NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2f16x2_rs_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2f16x2_rs_relu_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
 
 def __nvvm_f2bf16_rn : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
 def __nvvm_f2bf16_rn_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
@@ -616,6 +640,19 @@ def __nvvm_e4m3x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(sh
 def __nvvm_e5m2x2_to_f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM_89, PTX81>;
 def __nvvm_e5m2x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM_89, PTX81>;
 
+def __nvvm_f32x4_to_e4m3x4_rs_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", 
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e4m3x4_rs_relu_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", 
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e5m2x4_rs_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", 
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e5m2x4_rs_relu_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", 
+                       SM<"100a", [SM_103a]>, PTX87>;
+
 def __nvvm_ff_to_e2m3x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_e2m3x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_e3m2x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
@@ -626,12 +663,32 @@ def __nvvm_e2m3x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(sh
 def __nvvm_e3m2x2_to_f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_e3m2x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 
+def __nvvm_f32x4_to_e2m3x4_rs_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", 
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e2m3x4_rs_relu_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", 
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e3m2x4_rs_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", 
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e3m2x4_rs_relu_satfinite :
+  NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", 
+                       SM<"100a", [SM_103a]>, PTX87>;
+
 def __nvvm_ff_to_e2m1x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_e2m1x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 
 def __nvvm_e2m1x2_to_f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_e2m1x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 
+def __nvvm_f32x4_to_e2m1x4_rs_satfinite :
+  NVPTXBuiltinSMAndPTX<"short(_Vector<4, float>, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e2m1x4_rs_relu_satfinite :
+  NVPTXBuiltinSMAndPTX<"short(_Vector<4, float>, uint32_t)",
+                       SM<"100a", [SM_103a]>, PTX87>;
+
 def __nvvm_ff_to_ue8m0x2_rz : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_ue8m0x2_rz_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_ue8m0x2_rp : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 
@@ -245,7 +245,6 @@ LANGOPT(HLSLStrictAvailability, 1, 0, NotCompatible,
 LANGOPT(HLSLSpvUseUnknownImageFormat, 1, 0, NotCompatible, "For storage images and texel buffers, sets the default format to 'Unknown' when not specified via the `vk::image_format` attribute. If this option is not used, the format is inferred from the resource's data type.")
 
 LANGOPT(CUDAIsDevice      , 1, 0, NotCompatible, "compiling for CUDA device")
-LANGOPT(CUDAAllowVariadicFunctions, 1, 0, NotCompatible, "allowing variadic functions in CUDA device code")
 LANGOPT(CUDAHostDeviceConstexpr, 1, 1, NotCompatible, "treating unattributed constexpr functions as __host__ __device__")
 LANGOPT(GPUDeviceApproxTranscendentals, 1, 0, NotCompatible, "using approximate transcendental functions")
 LANGOPT(GPURelocatableDeviceCode, 1, 0, NotCompatible, "generate relocatable device code")
 
@@ -178,9 +178,10 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   }
 
   mlir::Value createComplexImag(mlir::Location loc, mlir::Value operand) {
-    auto operandTy = mlir::cast<cir::ComplexType>(operand.getType());
-    return cir::ComplexImagOp::create(*this, loc, operandTy.getElementType(),
-                                      operand);
+    auto resultType = operand.getType();
+    if (auto complexResultType = mlir::dyn_cast<cir::ComplexType>(resultType))
+      resultType = complexResultType.getElementType();
+    return cir::ComplexImagOp::create(*this, loc, resultType, operand);
   }
 
   cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr,
 
@@ -3308,18 +3308,20 @@ def CIR_ComplexRealOp : CIR_Op<"complex.real", [Pure]> {
 def CIR_ComplexImagOp : CIR_Op<"complex.imag", [Pure]> {
   let summary = "Extract the imaginary part of a complex value";
   let description = [{
-    `cir.complex.imag` operation takes an operand of `!cir.complex` type and
-    yields the imaginary part of it.
+    `cir.complex.imag` operation takes an operand of `!cir.complex`, `!cir.int`
+    or `!cir.float`. If the operand is `!cir.complex`, the imag part of it will
+    be returned, otherwise a zero value will be returned.  
 
     Example:
 
     ```mlir
-    %1 = cir.complex.imag %0 : !cir.complex<!cir.float> -> !cir.float
+    %imag = cir.complex.imag %complex : !cir.complex<!cir.float> -> !cir.float
+    %imag = cir.complex.imag %scalar : !cir.float -> !cir.float
     ```
   }];
 
   let results = (outs CIR_AnyIntOrFloatType:$result);
-  let arguments = (ins CIR_ComplexType:$operand);
+  let arguments = (ins CIR_AnyComplexOrIntOrFloatType:$operand);
 
   let assemblyFormat = [{
     $operand `:` qualified(type($operand)) `->` qualified(type($result))
 
@@ -8733,8 +8733,7 @@ def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">,
   HelpText<"Incorporate CUDA device-side binary into host object file.">,
   MarshallingInfoString<CodeGenOpts<"CudaGpuBinaryFileName">>;
 def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">,
-  HelpText<"Allow variadic functions in CUDA device code.">,
-  MarshallingInfoFlag<LangOpts<"CUDAAllowVariadicFunctions">>;
+  HelpText<"Deprecated; Allow variadic functions in CUDA device code.">;
 def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr">,
   HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">,
   MarshallingInfoNegativeFlag<LangOpts<"CUDAHostDeviceConstexpr">>;
 
@@ -257,6 +257,7 @@ struct NormalizedConstraint {
     case ConstraintKind::FoldExpanded:
       return FoldExpanded.Pattern->getBeginLoc();
     }
+    llvm_unreachable("Unknown ConstraintKind enum");
   }
 
   SourceLocation getEndLoc() const {
@@ -270,6 +271,7 @@ struct NormalizedConstraint {
     case ConstraintKind::FoldExpanded:
       return FoldExpanded.Pattern->getEndLoc();
     }
+    llvm_unreachable("Unknown ConstraintKind enum");
   }
 
   SourceRange getSourceRange() const { return {getBeginLoc(), getEndLoc()}; }
 
@@ -736,25 +736,6 @@ static bool interp__builtin_expect(InterpState &S, CodePtr OpPC,
   return true;
 }
 
-/// rotateleft(value, amount)
-static bool interp__builtin_rotate(InterpState &S, CodePtr OpPC,
-                                   const InterpFrame *Frame,
-                                   const CallExpr *Call, bool Right) {
-  APSInt Amount = popToAPSInt(S, Call->getArg(1));
-  APSInt Value = popToAPSInt(S, Call->getArg(0));
-
-  APSInt Result;
-  if (Right)
-    Result = APSInt(Value.rotr(Amount.urem(Value.getBitWidth())),
-                    /*IsUnsigned=*/true);
-  else // Left.
-    Result = APSInt(Value.rotl(Amount.urem(Value.getBitWidth())),
-                    /*IsUnsigned=*/true);
-
-  pushInteger(S, Result, Call->getType());
-  return true;
-}
-
 static bool interp__builtin_ffs(InterpState &S, CodePtr OpPC,
                                 const InterpFrame *Frame,
                                 const CallExpr *Call) {
@@ -2796,7 +2777,7 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
     unsigned LaneBase = (Idx / LaneElts) * LaneElts;
     unsigned LaneIdx = Idx % LaneElts;
     unsigned SrcIdx = Idx;
-    unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3;
+    unsigned Sel = (Ctl >> (2 * (LaneIdx & 0x3))) & 0x3;
     if (ElemBits == 32) {
       SrcIdx = LaneBase + Sel;
     } else {
@@ -2805,8 +2786,6 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
       if (!IsShufHW && !InHigh) {
         SrcIdx = LaneBase + Sel;
       } else if (IsShufHW && InHigh) {
-        unsigned Rel = LaneIdx - HalfSize;
-        Sel = (Ctl >> (2 * Rel)) & 0x3;
         SrcIdx = LaneBase + HalfSize + Sel;
       }
     }
@@ -3162,7 +3141,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case Builtin::BI_rotl:
   case Builtin::BI_lrotl:
   case Builtin::BI_rotl64:
-    return interp__builtin_rotate(S, OpPC, Frame, Call, /*Right=*/false);
+    return interp__builtin_elementwise_int_binop(
+        S, OpPC, Call, [](const APSInt &Value, const APSInt &Amount) -> APInt {
+          return Value.rotl(Amount);
+        });
 
   case Builtin::BI__builtin_rotateright8:
   case Builtin::BI__builtin_rotateright16:
@@ -3173,7 +3155,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case Builtin::BI_rotr:
   case Builtin::BI_lrotr:
   case Builtin::BI_rotr64:
-    return interp__builtin_rotate(S, OpPC, Frame, Call, /*Right=*/true);
+    return interp__builtin_elementwise_int_binop(
+        S, OpPC, Call, [](const APSInt &Value, const APSInt &Amount) -> APInt {
+          return Value.rotr(Amount);
+        });
 
   case Builtin::BI__builtin_ffs:
   case Builtin::BI__builtin_ffsl:
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-name: LLVM Tests`
	`1`	`+name: LLVM ABI Tests`
`2`	`2`
`3`	`3`	`permissions:`
`4`	`4`	`contents: read`
Original file line number	Diff line number	Diff line change
`@@ -178,9 +178,10 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {`
`178`	`178`	`}`
`179`	`179`
`180`	`180`	`mlir::Value createComplexImag(mlir::Location loc, mlir::Value operand) {`
`181`		`- auto operandTy = mlir::cast<cir::ComplexType>(operand.getType());`
`182`		`- return cir::ComplexImagOp::create(*this, loc, operandTy.getElementType(),`
`183`		`- operand);`
	`181`	`+ auto resultType = operand.getType();`
	`182`	`+ if (auto complexResultType = mlir::dyn_cast<cir::ComplexType>(resultType))`
	`183`	`+ resultType = complexResultType.getElementType();`
	`184`	`+ return cir::ComplexImagOp::create(*this, loc, resultType, operand);`
`184`	`185`	`}`
`185`	`186`
`186`	`187`	`cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr,`
Original file line number	Diff line number	Diff line change
`@@ -257,6 +257,7 @@ struct NormalizedConstraint {`
`257`	`257`	`case ConstraintKind::FoldExpanded:`
`258`	`258`	`return FoldExpanded.Pattern->getBeginLoc();`
`259`	`259`	`}`
	`260`	`+ llvm_unreachable("Unknown ConstraintKind enum");`
`260`	`261`	`}`
`261`	`262`
`262`	`263`	`SourceLocation getEndLoc() const {`
`@@ -270,6 +271,7 @@ struct NormalizedConstraint {`
`270`	`271`	`case ConstraintKind::FoldExpanded:`
`271`	`272`	`return FoldExpanded.Pattern->getEndLoc();`
`272`	`273`	`}`
	`274`	`+ llvm_unreachable("Unknown ConstraintKind enum");`
`273`	`275`	`}`
`274`	`276`
`275`	`277`	`SourceRange getSourceRange() const { return {getBeginLoc(), getEndLoc()}; }`