Skip to content

Commit ed9a102

Browse files
authored
[HLSL] Avoid putting the byval attribute on out and inout parameters (#150495)
Fixes #148063 by preventing the ByVal attribute from being placed on out and inout function parameters which causes them to be eliminated by the Dead Store Elimination (DSE) pass.
1 parent d9952a7 commit ed9a102

File tree

2 files changed

+28
-20
lines changed

2 files changed

+28
-20
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2852,20 +2852,28 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
28522852
if (AI.getInReg())
28532853
Attrs.addAttribute(llvm::Attribute::InReg);
28542854

2855-
// Depending on the ABI, this may be either a byval or a dead_on_return
2856-
// argument.
2857-
if (AI.getIndirectByVal()) {
2858-
Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
2859-
} else {
2860-
// Add dead_on_return when the object's lifetime ends in the callee.
2861-
// This includes trivially-destructible objects, as well as objects
2862-
// whose destruction / clean-up is carried out within the callee (e.g.,
2863-
// Obj-C ARC-managed structs, MSVC callee-destroyed objects).
2864-
if (!ParamType.isDestructedType() || !ParamType->isRecordType() ||
2865-
ParamType->castAs<RecordType>()
2866-
->getDecl()
2867-
->isParamDestroyedInCallee())
2868-
Attrs.addAttribute(llvm::Attribute::DeadOnReturn);
2855+
// HLSL out and inout parameters must not be marked with ByVal or
2856+
// DeadOnReturn attributes because stores to these parameters by the
2857+
// callee are visible to the caller.
2858+
if (auto ParamABI = FI.getExtParameterInfo(ArgNo).getABI();
2859+
ParamABI != ParameterABI::HLSLOut &&
2860+
ParamABI != ParameterABI::HLSLInOut) {
2861+
2862+
// Depending on the ABI, this may be either a byval or a dead_on_return
2863+
// argument.
2864+
if (AI.getIndirectByVal()) {
2865+
Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
2866+
} else {
2867+
// Add dead_on_return when the object's lifetime ends in the callee.
2868+
// This includes trivially-destructible objects, as well as objects
2869+
// whose destruction / clean-up is carried out within the callee
2870+
// (e.g., Obj-C ARC-managed structs, MSVC callee-destroyed objects).
2871+
if (!ParamType.isDestructedType() || !ParamType->isRecordType() ||
2872+
ParamType->castAs<RecordType>()
2873+
->getDecl()
2874+
->isParamDestroyedInCallee())
2875+
Attrs.addAttribute(llvm::Attribute::DeadOnReturn);
2876+
}
28692877
}
28702878

28712879
auto *Decl = ParamType->getAsRecordDecl();

clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ void increment(inout int Arr[2]) {
1111
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
1212
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
1313
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
14-
// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
14+
// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef align 4 [[Tmp]])
1515
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
1616
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
1717
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -32,7 +32,7 @@ void fn2(out int Arr[2]) {
3232
// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
3333
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
3434
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
35-
// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
35+
// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef align 4 [[Tmp]])
3636
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
3737
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
3838
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -56,7 +56,7 @@ void nestedCall(inout int Arr[2], uint index) {
5656
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
5757
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
5858
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
59-
// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]], i32 noundef 0)
59+
// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef align 4 [[Tmp]], i32 noundef 0)
6060
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
6161
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 1
6262
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -70,7 +70,7 @@ export int arrayCall3() {
7070
// CHECK-LABEL: outerCall
7171
// CHECK: [[Tmp:%.*]] = alloca [2 x i32], align 4
7272
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 %{{.*}}, i32 8, i1 false)
73-
// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
73+
// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef align 4 [[Tmp]])
7474
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 {{.*}}, ptr align 4 [[Tmp]], i32 8, i1 false)
7575
// CHECK-NEXT: ret void
7676
void outerCall(inout int Arr[2]) {
@@ -82,7 +82,7 @@ void outerCall(inout int Arr[2]) {
8282
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
8383
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
8484
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
85-
// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
85+
// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef align 4 [[Tmp]])
8686
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
8787
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
8888
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -110,7 +110,7 @@ void outerCall2(inout int Arr[2]) {
110110
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
111111
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
112112
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
113-
// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
113+
// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef align 4 [[Tmp]])
114114
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
115115
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
116116
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4

0 commit comments

Comments
 (0)