llvm
diff --git a/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 1 addition & 0 deletions b/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎clang/include/clang/Basic/Builtins.td‎
Lines changed: 6 additions & 0 deletions b/‎clang/include/clang/Basic/Builtins.td‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎clang/lib/Analysis/FlowSensitive/Transfer.cpp‎
Lines changed: 6 additions & 1 deletion b/‎clang/lib/Analysis/FlowSensitive/Transfer.cpp‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp‎
Lines changed: 16 additions & 4 deletions b/‎clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎clang/lib/CodeGen/CGHLSLBuiltins.cpp‎
Lines changed: 7 additions & 0 deletions b/‎clang/lib/CodeGen/CGHLSLBuiltins.cpp‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎clang/lib/CodeGen/CGHLSLRuntime.h‎
Lines changed: 2 additions & 0 deletions b/‎clang/lib/CodeGen/CGHLSLRuntime.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎clang/lib/Headers/hlsl/hlsl_intrinsics.h‎
Lines changed: 24 additions & 0 deletions b/‎clang/lib/Headers/hlsl/hlsl_intrinsics.h‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎clang/lib/Sema/SemaChecking.cpp‎
Lines changed: 13 additions & 1 deletion b/‎clang/lib/Sema/SemaChecking.cpp‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎clang/test/CIR/CodeGen/complex.cpp‎
Lines changed: 103 additions & 1 deletion b/‎clang/test/CIR/CodeGen/complex.cpp‎
Lines changed: 103 additions & 1 deletion
diff --git a/‎clang/test/CodeGenHLSL/resources/NonUniformResourceIndex.hlsl‎
Lines changed: 38 additions & 0 deletions b/‎clang/test/CodeGenHLSL/resources/NonUniformResourceIndex.hlsl‎
Lines changed: 38 additions & 0 deletions
@@ -422,6 +422,7 @@ Bug Fixes to C++ Support
   ``__builtin_addressof``, and related issues with builtin arguments. (#GH154034)
 - Fix an assertion failure when taking the address on a non-type template parameter argument of
   object type. (#GH151531)
+- Suppress ``-Wdouble-promotion`` when explicitly asked for with C++ list initialization (#GH33409).
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -4945,6 +4945,12 @@ def HLSLResourceHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLResourceNonUniformIndex : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_resource_nonuniformindex"];
+  let Attributes = [NoThrow];
+  let Prototype = "uint32_t(uint32_t)";
+}
+
 def HLSLAll : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_all"];
   let Attributes = [NoThrow, Const];
 
@@ -657,7 +657,12 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       if (LocSrc == nullptr || LocDst == nullptr)
         return;
 
-      copyRecord(*LocSrc, *LocDst, Env);
+      // If the destination object here is of a derived class, `Arg0` may be a
+      // cast of that object to a base class, and the source object may be of a
+      // sibling derived class. To handle these cases, ensure we are copying
+      // only the fields for `Arg0`'s type, not the type of the underlying
+      // `RecordStorageLocation`.
+      copyRecord(*LocSrc, *LocDst, Env, Arg0->getType());
 
       // The assignment operator can have an arbitrary return type. We model the
       // return value only if the return type is the same as or a base class of
 
@@ -2140,11 +2140,23 @@ mlir::Value ScalarExprEmitter::VisitRealImag(const UnaryOperator *e,
                : builder.createComplexImag(loc, complex);
   }
 
-  // __real or __imag on a scalar returns zero. Emit the subexpr to ensure side
+  if (e->getOpcode() == UO_Real) {
+    return promotionTy.isNull() ? Visit(op)
+                                : cgf.emitPromotedScalarExpr(op, promotionTy);
+  }
+
+  // __imag on a scalar returns zero. Emit the subexpr to ensure side
   // effects are evaluated, but not the actual value.
-  cgf.cgm.errorNYI(e->getSourceRange(),
-                   "VisitRealImag __real or __imag on a scalar");
-  return {};
+  if (op->isGLValue())
+    cgf.emitLValue(op);
+  else if (!promotionTy.isNull())
+    cgf.emitPromotedScalarExpr(op, promotionTy);
+  else
+    cgf.emitScalarExpr(op);
+
+  mlir::Type valueTy =
+      cgf.convertType(promotionTy.isNull() ? e->getType() : promotionTy);
+  return builder.getNullValue(valueTy, loc);
 }
 
 /// Return the size or alignment of the type of argument of the sizeof
 
@@ -352,6 +352,13 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     SmallVector<Value *> Args{OrderID, SpaceOp, RangeOp, IndexOp, Name};
     return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
   }
+  case Builtin::BI__builtin_hlsl_resource_nonuniformindex: {
+    Value *IndexOp = EmitScalarExpr(E->getArg(0));
+    llvm::Type *RetTy = ConvertType(E->getType());
+    return Builder.CreateIntrinsic(
+        RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(),
+        ArrayRef<Value *>{IndexOp});
+  }
   case Builtin::BI__builtin_hlsl_all: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     return Builder.CreateIntrinsic(
 
@@ -129,6 +129,8 @@ class CGHLSLRuntime {
                                    resource_handlefrombinding)
   GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromImplicitBinding,
                                    resource_handlefromimplicitbinding)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(NonUniformResourceIndex,
+                                   resource_nonuniformindex)
   GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter)
   GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
                                    group_memory_barrier_with_group_sync)
 
@@ -422,6 +422,30 @@ constexpr int4 D3DCOLORtoUBYTE4(float4 V) {
   return __detail::d3d_color_to_ubyte4_impl(V);
 }
 
+//===----------------------------------------------------------------------===//
+// NonUniformResourceIndex builtin
+//===----------------------------------------------------------------------===//
+
+/// \fn uint NonUniformResourceIndex(uint I)
+/// \brief A compiler hint to indicate that a resource index varies across
+/// threads within a wave (i.e., it is non-uniform).
+/// \param I [in] Resource array index
+///
+/// The return value is the \Index parameter.
+///
+/// When indexing into an array of shader resources (e.g., textures, buffers),
+/// some GPU hardware and drivers require the compiler to know whether the index
+/// is uniform (same for all threads) or non-uniform (varies per thread).
+///
+/// Using NonUniformResourceIndex explicitly marks an index as non-uniform,
+/// disabling certain assumptions or optimizations that could lead to incorrect
+/// behavior when dynamically accessing resource arrays with non-uniform
+/// indices.
+
+constexpr uint32_t NonUniformResourceIndex(uint32_t Index) {
+  return __builtin_hlsl_resource_nonuniformindex(Index);
+}
+
 //===----------------------------------------------------------------------===//
 // reflect builtin
 //===----------------------------------------------------------------------===//
 
@@ -13043,7 +13043,19 @@ static void AnalyzeImplicitConversions(
 
   // Skip past explicit casts.
   if (auto *CE = dyn_cast<ExplicitCastExpr>(E)) {
-    E = CE->getSubExpr()->IgnoreParenImpCasts();
+    E = CE->getSubExpr();
+    // In the special case of a C++ function-style cast with braces,
+    // CXXFunctionalCastExpr has an InitListExpr as direct child with a single
+    // initializer. This InitListExpr basically belongs to the cast itself, so
+    // we skip it too. Specifically this is needed to silence -Wdouble-promotion
+    if (isa<CXXFunctionalCastExpr>(CE)) {
+      if (auto *InitListE = dyn_cast<InitListExpr>(E)) {
+        if (InitListE->getNumInits() == 1) {
+          E = InitListE->getInit(0);
+        }
+      }
+    }
+    E = E->IgnoreParenImpCasts();
     if (!CE->getType()->isVoidType() && E->getType()->isAtomicType())
       S.Diag(E->getBeginLoc(), diag::warn_atomic_implicit_seq_cst);
     WorkList.push_back({E, CC, IsListInit});
 
@@ -1092,4 +1092,106 @@ void imag_on_non_glvalue() {
 // OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
 // OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
 // OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
-// OGCG: store float %[[A_IMAG]], ptr %[[B_ADDR]], align 4
+// OGCG: store float %[[A_IMAG]], ptr %[[B_ADDR]], align 4
+
+void real_on_scalar_glvalue() {
+  float a;
+  float b = __real__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.float>, !cir.float
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
+
+// LLVM: %[[A_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load float, ptr %[[A_ADDR]], align 4
+// LLVM: store float %[[TMP_A]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca float, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
+// OGCG: %[[TMP_A:.*]] = load float, ptr %[[A_ADDR]], align 4
+// OGCG: store float %[[TMP_A]], ptr %[[B_ADDR]], align 4
+
+void imag_on_scalar_glvalue() {
+  float a;
+  float b = __imag__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b", init]
+// CIR: %[[CONST_ZERO:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
+// CIR: cir.store{{.*}} %[[CONST_ZERO]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
+
+// LLVM: %[[A_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca float, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
+// OGCG: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
+
+void real_on_scalar_with_type_promotion() {
+  _Float16 a;
+  _Float16 b = __real__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.f16>, !cir.f16
+// CIR: %[[TMP_A_F32:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.f16), !cir.float
+// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A_F32]] : !cir.float), !cir.f16
+// CIR: cir.store{{.*}} %[[TMP_A_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
+
+// LLVM: %[[A_ADDR:.*]] = alloca half, i64 1, align 2
+// LLVM: %[[B_ADDR:.*]] = alloca half, i64 1, align 2
+// LLVM: %[[TMP_A:.*]] = load half, ptr %[[A_ADDR]], align 2
+// LLVM: %[[TMP_A_F32:.*]] = fpext half %[[TMP_A]] to float
+// LLVM: %[[TMP_A_F16:.*]] = fptrunc float %[[TMP_A_F32]] to half
+// LLVM: store half %[[TMP_A_F16]], ptr %[[B_ADDR]], align 2
+
+// OGCG: %[[A_ADDR:.*]] = alloca half, align 2
+// OGCG: %[[B_ADDR:.*]] = alloca half, align 2
+// OGCG: %[[TMP_A:.*]] = load half, ptr %[[A_ADDR]], align 2
+// OGCG: %[[TMP_A_F32:.*]] = fpext half %[[TMP_A]] to float
+// OGCG: %[[TMP_A_F16:.*]] = fptrunc float %[[TMP_A_F32]] to half
+// OGCG: store half %[[TMP_A_F16]], ptr %[[B_ADDR]], align 2
+
+void imag_on_scalar_with_type_promotion() {
+  _Float16 a;
+  _Float16 b = __imag__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
+// CIR: %[[CONST_ZERO:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
+// CIR: %[[CONST_ZERO_F16:.*]] = cir.cast(floating, %[[CONST_ZERO]] : !cir.float), !cir.f16
+// CIR: cir.store{{.*}} %[[CONST_ZERO_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
+
+// LLVM: %[[A_ADDR:.*]] = alloca half, i64 1, align 2
+// LLVM: %[[B_ADDR:.*]] = alloca half, i64 1, align 2
+// LLVM: store half 0xH0000, ptr %[[B_ADDR]], align 2
+
+// OGCG: %[[A_ADDR:.*]] = alloca half, align 2
+// OGCG: %[[B_ADDR:.*]] = alloca half, align 2
+// OGCG: store half 0xH0000, ptr %[[B_ADDR]], align 2
+
+void imag_on_const_scalar() {
+  float a;
+  float b = __imag__ 1.0f;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b", init]
+// CIR: %[[CONST_ONE:.*]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+// CIR: %[[CONST_ZERO:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
+// CIR: cir.store{{.*}} %[[CONST_ZERO]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
+
+// LLVM: %[[A_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca float, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
+// OGCG: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -emit-llvm -disable-llvm-passes -o - %s \
+// RUN:   | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL
+// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -emit-llvm -disable-llvm-passes -o - %s \
+// RUN:   | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPV
+
+RWBuffer<float> A[10];
+
+[numthreads(4,1,1)]
+void main(uint GI : SV_GroupID) {
+  // CHECK: %[[GI:.*]] = load i32, ptr %GI.addr
+  // CHECK: %[[NURI_1:.*]] = call {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %[[GI]])
+  // CHECK: call void @hlsl::RWBuffer<float>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+  // CHECK-SAME: (ptr {{.*}}, i32 noundef 0, i32 noundef 0, i32 noundef 10, i32 noundef %[[NURI_1]], ptr noundef @A.str)
+  float a = A[NonUniformResourceIndex(GI)][0];
+
+  // CHECK: %[[GI:.*]] = load i32, ptr %GI.addr
+  // CHECK: %[[ADD:.*]] = add i32 %[[GI]], 1
+  // CHECK: %[[NURI_2:.*]] = call {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %[[ADD]])
+  // CHECK: %[[MOD:.*]] = urem i32 %[[NURI_2]], 10
+  // CHECK: call void @hlsl::RWBuffer<float>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+  // CHECK-SAME: (ptr {{.*}}, i32 noundef 0, i32 noundef 0, i32 noundef 10, i32 noundef %[[MOD]], ptr noundef @A.str)
+  float b = A[NonUniformResourceIndex(GI + 1) % 10][0];
+
+  // CHECK: %[[GI:.*]] = load i32, ptr %GI.addr
+  // CHECK: %[[NURI_3:.*]] = call {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %[[GI]])
+  // CHECK: %[[MUL:.*]] = mul i32 3, %[[NURI_3]]
+  // CHECK: %[[ADD2:.*]] = add i32 10, %[[MUL]]
+  // CHECK: call void @hlsl::RWBuffer<float>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+  // CHECK-SAME: (ptr {{.*}}, i32 noundef 0, i32 noundef 0, i32 noundef 10, i32 noundef %[[ADD2]], ptr noundef @A.str)
+  float c = A[10 + 3 * NonUniformResourceIndex(GI)][0];
+  A[0][0] = a + b + c;
+}
+
+// CHECK: define {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %Index)
+// CHECK: %[[INDEX1:.*]] = load i32, ptr %Index.addr, align 4
+// DXIL: %[[INDEX2:.*]] = call i32 @llvm.dx.resource.nonuniformindex(i32 %[[INDEX1]])
+// SPV: %[[INDEX2:.*]] = call i32 @llvm.spv.resource.nonuniformindex(i32 %[[INDEX1]])
+// CHECK: ret i32 %[[INDEX2]]