Skip to content

Commit a67cbb3

Browse files
fix some comments
Created using spr 1.3.7
2 parents 9690e48 + 6e904e8 commit a67cbb3

File tree

68 files changed

+3766
-686
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+3766
-686
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ Bug Fixes to C++ Support
422422
``__builtin_addressof``, and related issues with builtin arguments. (#GH154034)
423423
- Fix an assertion failure when taking the address on a non-type template parameter argument of
424424
object type. (#GH151531)
425+
- Suppress ``-Wdouble-promotion`` when explicitly asked for with C++ list initialization (#GH33409).
425426

426427
Bug Fixes to AST Handling
427428
^^^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4945,6 +4945,12 @@ def HLSLResourceHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
49454945
let Prototype = "void(...)";
49464946
}
49474947

4948+
def HLSLResourceNonUniformIndex : LangBuiltin<"HLSL_LANG"> {
4949+
let Spellings = ["__builtin_hlsl_resource_nonuniformindex"];
4950+
let Attributes = [NoThrow];
4951+
let Prototype = "uint32_t(uint32_t)";
4952+
}
4953+
49484954
def HLSLAll : LangBuiltin<"HLSL_LANG"> {
49494955
let Spellings = ["__builtin_hlsl_all"];
49504956
let Attributes = [NoThrow, Const];

clang/lib/Analysis/FlowSensitive/Transfer.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,12 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
657657
if (LocSrc == nullptr || LocDst == nullptr)
658658
return;
659659

660-
copyRecord(*LocSrc, *LocDst, Env);
660+
// If the destination object here is of a derived class, `Arg0` may be a
661+
// cast of that object to a base class, and the source object may be of a
662+
// sibling derived class. To handle these cases, ensure we are copying
663+
// only the fields for `Arg0`'s type, not the type of the underlying
664+
// `RecordStorageLocation`.
665+
copyRecord(*LocSrc, *LocDst, Env, Arg0->getType());
661666

662667
// The assignment operator can have an arbitrary return type. We model the
663668
// return value only if the return type is the same as or a base class of

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2140,11 +2140,23 @@ mlir::Value ScalarExprEmitter::VisitRealImag(const UnaryOperator *e,
21402140
: builder.createComplexImag(loc, complex);
21412141
}
21422142

2143-
// __real or __imag on a scalar returns zero. Emit the subexpr to ensure side
2143+
if (e->getOpcode() == UO_Real) {
2144+
return promotionTy.isNull() ? Visit(op)
2145+
: cgf.emitPromotedScalarExpr(op, promotionTy);
2146+
}
2147+
2148+
// __imag on a scalar returns zero. Emit the subexpr to ensure side
21442149
// effects are evaluated, but not the actual value.
2145-
cgf.cgm.errorNYI(e->getSourceRange(),
2146-
"VisitRealImag __real or __imag on a scalar");
2147-
return {};
2150+
if (op->isGLValue())
2151+
cgf.emitLValue(op);
2152+
else if (!promotionTy.isNull())
2153+
cgf.emitPromotedScalarExpr(op, promotionTy);
2154+
else
2155+
cgf.emitScalarExpr(op);
2156+
2157+
mlir::Type valueTy =
2158+
cgf.convertType(promotionTy.isNull() ? e->getType() : promotionTy);
2159+
return builder.getNullValue(valueTy, loc);
21482160
}
21492161

21502162
/// Return the size or alignment of the type of argument of the sizeof

clang/lib/CodeGen/CGHLSLBuiltins.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,13 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
352352
SmallVector<Value *> Args{OrderID, SpaceOp, RangeOp, IndexOp, Name};
353353
return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
354354
}
355+
case Builtin::BI__builtin_hlsl_resource_nonuniformindex: {
356+
Value *IndexOp = EmitScalarExpr(E->getArg(0));
357+
llvm::Type *RetTy = ConvertType(E->getType());
358+
return Builder.CreateIntrinsic(
359+
RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(),
360+
ArrayRef<Value *>{IndexOp});
361+
}
355362
case Builtin::BI__builtin_hlsl_all: {
356363
Value *Op0 = EmitScalarExpr(E->getArg(0));
357364
return Builder.CreateIntrinsic(

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ class CGHLSLRuntime {
129129
resource_handlefrombinding)
130130
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromImplicitBinding,
131131
resource_handlefromimplicitbinding)
132+
GENERATE_HLSL_INTRINSIC_FUNCTION(NonUniformResourceIndex,
133+
resource_nonuniformindex)
132134
GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter)
133135
GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
134136
group_memory_barrier_with_group_sync)

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,30 @@ constexpr int4 D3DCOLORtoUBYTE4(float4 V) {
422422
return __detail::d3d_color_to_ubyte4_impl(V);
423423
}
424424

425+
//===----------------------------------------------------------------------===//
426+
// NonUniformResourceIndex builtin
427+
//===----------------------------------------------------------------------===//
428+
429+
/// \fn uint NonUniformResourceIndex(uint I)
430+
/// \brief A compiler hint to indicate that a resource index varies across
431+
/// threads within a wave (i.e., it is non-uniform).
432+
/// \param I [in] Resource array index
433+
///
434+
/// The return value is the \Index parameter.
435+
///
436+
/// When indexing into an array of shader resources (e.g., textures, buffers),
437+
/// some GPU hardware and drivers require the compiler to know whether the index
438+
/// is uniform (same for all threads) or non-uniform (varies per thread).
439+
///
440+
/// Using NonUniformResourceIndex explicitly marks an index as non-uniform,
441+
/// disabling certain assumptions or optimizations that could lead to incorrect
442+
/// behavior when dynamically accessing resource arrays with non-uniform
443+
/// indices.
444+
445+
constexpr uint32_t NonUniformResourceIndex(uint32_t Index) {
446+
return __builtin_hlsl_resource_nonuniformindex(Index);
447+
}
448+
425449
//===----------------------------------------------------------------------===//
426450
// reflect builtin
427451
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaChecking.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13043,7 +13043,19 @@ static void AnalyzeImplicitConversions(
1304313043

1304413044
// Skip past explicit casts.
1304513045
if (auto *CE = dyn_cast<ExplicitCastExpr>(E)) {
13046-
E = CE->getSubExpr()->IgnoreParenImpCasts();
13046+
E = CE->getSubExpr();
13047+
// In the special case of a C++ function-style cast with braces,
13048+
// CXXFunctionalCastExpr has an InitListExpr as direct child with a single
13049+
// initializer. This InitListExpr basically belongs to the cast itself, so
13050+
// we skip it too. Specifically this is needed to silence -Wdouble-promotion
13051+
if (isa<CXXFunctionalCastExpr>(CE)) {
13052+
if (auto *InitListE = dyn_cast<InitListExpr>(E)) {
13053+
if (InitListE->getNumInits() == 1) {
13054+
E = InitListE->getInit(0);
13055+
}
13056+
}
13057+
}
13058+
E = E->IgnoreParenImpCasts();
1304713059
if (!CE->getType()->isVoidType() && E->getType()->isAtomicType())
1304813060
S.Diag(E->getBeginLoc(), diag::warn_atomic_implicit_seq_cst);
1304913061
WorkList.push_back({E, CC, IsListInit});

clang/test/CIR/CodeGen/complex.cpp

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1092,4 +1092,106 @@ void imag_on_non_glvalue() {
10921092
// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
10931093
// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
10941094
// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
1095-
// OGCG: store float %[[A_IMAG]], ptr %[[B_ADDR]], align 4
1095+
// OGCG: store float %[[A_IMAG]], ptr %[[B_ADDR]], align 4
1096+
1097+
void real_on_scalar_glvalue() {
1098+
float a;
1099+
float b = __real__ a;
1100+
}
1101+
1102+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["a"]
1103+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b", init]
1104+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.float>, !cir.float
1105+
// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
1106+
1107+
// LLVM: %[[A_ADDR:.*]] = alloca float, i64 1, align 4
1108+
// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
1109+
// LLVM: %[[TMP_A:.*]] = load float, ptr %[[A_ADDR]], align 4
1110+
// LLVM: store float %[[TMP_A]], ptr %[[B_ADDR]], align 4
1111+
1112+
// OGCG: %[[A_ADDR:.*]] = alloca float, align 4
1113+
// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
1114+
// OGCG: %[[TMP_A:.*]] = load float, ptr %[[A_ADDR]], align 4
1115+
// OGCG: store float %[[TMP_A]], ptr %[[B_ADDR]], align 4
1116+
1117+
void imag_on_scalar_glvalue() {
1118+
float a;
1119+
float b = __imag__ a;
1120+
}
1121+
1122+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["a"]
1123+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b", init]
1124+
// CIR: %[[CONST_ZERO:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
1125+
// CIR: cir.store{{.*}} %[[CONST_ZERO]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
1126+
1127+
// LLVM: %[[A_ADDR:.*]] = alloca float, i64 1, align 4
1128+
// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
1129+
// LLVM: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
1130+
1131+
// OGCG: %[[A_ADDR:.*]] = alloca float, align 4
1132+
// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
1133+
// OGCG: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
1134+
1135+
void real_on_scalar_with_type_promotion() {
1136+
_Float16 a;
1137+
_Float16 b = __real__ a;
1138+
}
1139+
1140+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["a"]
1141+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
1142+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.f16>, !cir.f16
1143+
// CIR: %[[TMP_A_F32:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.f16), !cir.float
1144+
// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A_F32]] : !cir.float), !cir.f16
1145+
// CIR: cir.store{{.*}} %[[TMP_A_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
1146+
1147+
// LLVM: %[[A_ADDR:.*]] = alloca half, i64 1, align 2
1148+
// LLVM: %[[B_ADDR:.*]] = alloca half, i64 1, align 2
1149+
// LLVM: %[[TMP_A:.*]] = load half, ptr %[[A_ADDR]], align 2
1150+
// LLVM: %[[TMP_A_F32:.*]] = fpext half %[[TMP_A]] to float
1151+
// LLVM: %[[TMP_A_F16:.*]] = fptrunc float %[[TMP_A_F32]] to half
1152+
// LLVM: store half %[[TMP_A_F16]], ptr %[[B_ADDR]], align 2
1153+
1154+
// OGCG: %[[A_ADDR:.*]] = alloca half, align 2
1155+
// OGCG: %[[B_ADDR:.*]] = alloca half, align 2
1156+
// OGCG: %[[TMP_A:.*]] = load half, ptr %[[A_ADDR]], align 2
1157+
// OGCG: %[[TMP_A_F32:.*]] = fpext half %[[TMP_A]] to float
1158+
// OGCG: %[[TMP_A_F16:.*]] = fptrunc float %[[TMP_A_F32]] to half
1159+
// OGCG: store half %[[TMP_A_F16]], ptr %[[B_ADDR]], align 2
1160+
1161+
void imag_on_scalar_with_type_promotion() {
1162+
_Float16 a;
1163+
_Float16 b = __imag__ a;
1164+
}
1165+
1166+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["a"]
1167+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
1168+
// CIR: %[[CONST_ZERO:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
1169+
// CIR: %[[CONST_ZERO_F16:.*]] = cir.cast(floating, %[[CONST_ZERO]] : !cir.float), !cir.f16
1170+
// CIR: cir.store{{.*}} %[[CONST_ZERO_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
1171+
1172+
// LLVM: %[[A_ADDR:.*]] = alloca half, i64 1, align 2
1173+
// LLVM: %[[B_ADDR:.*]] = alloca half, i64 1, align 2
1174+
// LLVM: store half 0xH0000, ptr %[[B_ADDR]], align 2
1175+
1176+
// OGCG: %[[A_ADDR:.*]] = alloca half, align 2
1177+
// OGCG: %[[B_ADDR:.*]] = alloca half, align 2
1178+
// OGCG: store half 0xH0000, ptr %[[B_ADDR]], align 2
1179+
1180+
void imag_on_const_scalar() {
1181+
float a;
1182+
float b = __imag__ 1.0f;
1183+
}
1184+
1185+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["a"]
1186+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b", init]
1187+
// CIR: %[[CONST_ONE:.*]] = cir.const #cir.fp<1.000000e+00> : !cir.float
1188+
// CIR: %[[CONST_ZERO:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
1189+
// CIR: cir.store{{.*}} %[[CONST_ZERO]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
1190+
1191+
// LLVM: %[[A_ADDR:.*]] = alloca float, i64 1, align 4
1192+
// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
1193+
// LLVM: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
1194+
1195+
// OGCG: %[[A_ADDR:.*]] = alloca float, align 4
1196+
// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
1197+
// OGCG: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -emit-llvm -disable-llvm-passes -o - %s \
2+
// RUN: | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL
3+
// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -emit-llvm -disable-llvm-passes -o - %s \
4+
// RUN: | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPV
5+
6+
RWBuffer<float> A[10];
7+
8+
[numthreads(4,1,1)]
9+
void main(uint GI : SV_GroupID) {
10+
// CHECK: %[[GI:.*]] = load i32, ptr %GI.addr
11+
// CHECK: %[[NURI_1:.*]] = call {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %[[GI]])
12+
// CHECK: call void @hlsl::RWBuffer<float>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
13+
// CHECK-SAME: (ptr {{.*}}, i32 noundef 0, i32 noundef 0, i32 noundef 10, i32 noundef %[[NURI_1]], ptr noundef @A.str)
14+
float a = A[NonUniformResourceIndex(GI)][0];
15+
16+
// CHECK: %[[GI:.*]] = load i32, ptr %GI.addr
17+
// CHECK: %[[ADD:.*]] = add i32 %[[GI]], 1
18+
// CHECK: %[[NURI_2:.*]] = call {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %[[ADD]])
19+
// CHECK: %[[MOD:.*]] = urem i32 %[[NURI_2]], 10
20+
// CHECK: call void @hlsl::RWBuffer<float>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
21+
// CHECK-SAME: (ptr {{.*}}, i32 noundef 0, i32 noundef 0, i32 noundef 10, i32 noundef %[[MOD]], ptr noundef @A.str)
22+
float b = A[NonUniformResourceIndex(GI + 1) % 10][0];
23+
24+
// CHECK: %[[GI:.*]] = load i32, ptr %GI.addr
25+
// CHECK: %[[NURI_3:.*]] = call {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %[[GI]])
26+
// CHECK: %[[MUL:.*]] = mul i32 3, %[[NURI_3]]
27+
// CHECK: %[[ADD2:.*]] = add i32 10, %[[MUL]]
28+
// CHECK: call void @hlsl::RWBuffer<float>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
29+
// CHECK-SAME: (ptr {{.*}}, i32 noundef 0, i32 noundef 0, i32 noundef 10, i32 noundef %[[ADD2]], ptr noundef @A.str)
30+
float c = A[10 + 3 * NonUniformResourceIndex(GI)][0];
31+
A[0][0] = a + b + c;
32+
}
33+
34+
// CHECK: define {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %Index)
35+
// CHECK: %[[INDEX1:.*]] = load i32, ptr %Index.addr, align 4
36+
// DXIL: %[[INDEX2:.*]] = call i32 @llvm.dx.resource.nonuniformindex(i32 %[[INDEX1]])
37+
// SPV: %[[INDEX2:.*]] = call i32 @llvm.spv.resource.nonuniformindex(i32 %[[INDEX1]])
38+
// CHECK: ret i32 %[[INDEX2]]

0 commit comments

Comments
 (0)