Skip to content

Commit 93543b7

Browse files
authored
Fix copy-in/copy-out parameter passing (microsoft#4900)
This change fixes HLSL's copy-in/copy-out parameter passing. Before this change we skip emitting copies for all local variable parameters, which results in code correctness issues. With this change, we do a poor-man's alias analysis walking each local variable back to either an argument or an alloca. If it tracks back to an argument marked noalias we can skip the copy. If it tracks to an alloca, we skip the copy for the first parameter that tracks to that alloca, and emit the copies for any subsequent parameters. This change does have one side-effect that I don't understand. It results in changing the ordering of some of the disassembly output. I suspect that is caused by something in the disassembly being order-dependent but haven't investigated fully.
1 parent 88a2888 commit 93543b7

File tree

9 files changed

+187
-51
lines changed

9 files changed

+187
-51
lines changed

tools/clang/lib/CodeGen/CGHLSLMS.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5896,6 +5896,7 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
58965896
const std::function<void(const VarDecl *, llvm::Value *)> &TmpArgMap) {
58975897
// Special case: skip first argument of CXXOperatorCall (it is "this").
58985898
unsigned ArgsToSkip = isa<CXXOperatorCallExpr>(E) ? 1 : 0;
5899+
llvm::SmallSet<llvm::Value*, 8> ArgVals;
58995900
for (uint32_t i = 0; i < FD->getNumParams(); i++) {
59005901
const ParmVarDecl *Param = FD->getParamDecl(i);
59015902
const Expr *Arg = E->getArg(i+ArgsToSkip);
@@ -6018,21 +6019,28 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
60186019
// copy to let the lower not happen on argument when calle is noinline
60196020
// or extern functions. Will do it in HLLegalizeParameter after known
60206021
// which functions are extern but before inline.
6021-
bool bConstGlobal = false;
60226022
Value *Ptr = argAddr;
60236023
while (GEPOperator *GEP = dyn_cast_or_null<GEPOperator>(Ptr)) {
60246024
Ptr = GEP->getPointerOperand();
60256025
}
6026-
if (GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(Ptr)) {
6027-
bConstGlobal = m_ConstVarAnnotationMap.count(GV) | GV->isConstant();
6028-
}
60296026
// Skip copy-in copy-out when safe.
60306027
// The unsafe case will be global variable alias with parameter.
60316028
// Then global variable is updated in the function, the parameter will
60326029
// be updated silently. For non global variable or constant global
60336030
// variable, it should be safe.
6034-
if (argAddr &&
6035-
(isa<AllocaInst>(Ptr) || isa<Argument>(Ptr) || bConstGlobal)) {
6031+
bool SafeToSkip = false;
6032+
if (GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(Ptr)) {
6033+
SafeToSkip = ParamTy.isConstQualified() && (m_ConstVarAnnotationMap.count(GV) > 0 || GV->isConstant());
6034+
}
6035+
if (Ptr) {
6036+
if (isa<AllocaInst>(Ptr) && 0 == ArgVals.count(Ptr))
6037+
SafeToSkip = true;
6038+
else if (const auto *A = dyn_cast<Argument>(Ptr))
6039+
SafeToSkip = A->hasNoAliasAttr() && 0 == ArgVals.count(Ptr);
6040+
}
6041+
6042+
if (argAddr && SafeToSkip) {
6043+
ArgVals.insert(Ptr);
60366044
llvm::Type *ToTy = CGF.ConvertType(ParamTy.getNonReferenceType());
60376045
if (argAddr->getType()->getPointerElementType() == ToTy &&
60386046
// Check clang Type for case like int cast to unsigned.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// RUN: %dxc -T lib_6_4 -fcgl %s | FileCheck %s
2+
3+
4+
struct Pup {
5+
float X;
6+
};
7+
8+
void CalledFunction(inout float F, inout Pup P) {
9+
F = 4.0;
10+
P.X = 5.0;
11+
}
12+
13+
void fn() {
14+
float X;
15+
Pup P;
16+
17+
CalledFunction(X, P);
18+
CalledFunction(P.X, P);
19+
}
20+
21+
// CHECK: [[TmpP:%[0-9A-Z]+]] = alloca %struct.Pup
22+
23+
// CHECK: [[X:%[0-9A-Z]+]] = alloca float, align 4
24+
// CHECK: [[P:%[0-9A-Z]+]] = alloca %struct.Pup, align 4
25+
// CHECK: call void @"\01?CalledFunction@@YAXAIAMUPup@@@Z"(float* dereferenceable(4) [[X]], %struct.Pup* [[P]])
26+
27+
// CHECK: [[TmpPPtr:%[0-9A-Z]+]] = bitcast %struct.Pup* [[TmpP]] to i8*
28+
// CHECK: [[PPtr:%[0-9A-Z]+]] = bitcast %struct.Pup* [[P]] to i8*
29+
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TmpPPtr]], i8* [[PPtr]], i64 4, i32 1, i1 false)
30+
31+
// CHECK: [[PDotX:%[0-9A-Z]+]] = getelementptr inbounds %struct.Pup, %struct.Pup* [[P]], i32 0, i32 0
32+
// CHECK: call void @"\01?CalledFunction@@YAXAIAMUPup@@@Z"(float* dereferenceable(4) [[PDotX]], %struct.Pup* [[TmpP]])
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// RUN: %dxc -T lib_6_4 -fcgl %s | FileCheck %s
2+
3+
void CalledFunction(inout float X, inout float Y, inout float Z) {
4+
X = 1.0;
5+
Y = 2.0;
6+
Z = 3.0;
7+
}
8+
9+
void fn() {
10+
float X, Y, Z = 0.0;
11+
CalledFunction(X, Y, Z);
12+
13+
CalledFunction(X, X, Z);
14+
15+
CalledFunction(X, Y, X);
16+
}
17+
18+
// CHECK: define internal void @"\01?fn@@YAXXZ"()
19+
// CHECK: [[Tmp1:%[0-9A-Z]+]] = alloca float
20+
// CHECK: [[Tmp2:%[0-9A-Z]+]] = alloca float
21+
// CHECK: [[X:%[0-9A-Z]+]] = alloca float, align 4
22+
// CHECK: [[Y:%[0-9A-Z]+]] = alloca float, align 4
23+
// CHECK: [[Z:%[0-9A-Z]+]] = alloca float, align 4
24+
25+
// First call has no copy-in/copy out parameters since all parameters are unique.
26+
// CHECK: call void @"\01?CalledFunction@@YAXAIAM00@Z"(float* dereferenceable(4) [[X]], float* dereferenceable(4) [[Y]], float* dereferenceable(4) [[Z]])
27+
28+
// Second call, copies X for parameter 2.
29+
// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[X]], align 4
30+
// CHECK: store float [[TmpX]], float* [[Tmp2]]
31+
32+
// CHECK: call void @"\01?CalledFunction@@YAXAIAM00@Z"(float* dereferenceable(4) [[X]], float* dereferenceable(4) [[Tmp2]], float* dereferenceable(4) [[Z]])
33+
34+
// Second call, saves parameter 2 to X after the call.
35+
// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[Tmp2]]
36+
// CHECK: store float [[TmpX]], float* [[X]]
37+
38+
// The third call copies X for the third parameter.
39+
// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[X]], align 4
40+
// CHECK: store float [[TmpX]], float* [[Tmp1]]
41+
42+
// CHECK: call void @"\01?CalledFunction@@YAXAIAM00@Z"(float* dereferenceable(4) [[X]], float* dereferenceable(4) [[Y]], float* dereferenceable(4) [[Tmp1]])
43+
44+
// The third call stores parameter 3 to X after the call.
45+
// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[Tmp1]]
46+
// CHECK: store float [[TmpX]], float* [[X]]
47+
48+
void fn2() {
49+
float X = 0.0;
50+
CalledFunction(X, X, X);
51+
}
52+
53+
// CHECK: [[Tmp1:%[0-9A-Z]+]] = alloca float
54+
// CHECK: [[Tmp2:%[0-9A-Z]+]] = alloca float
55+
// CHECK: [[X:%[0-9A-Z]+]] = alloca float, align 4
56+
57+
// X gets copied in for both parameters two and three.
58+
// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[X]], align 4
59+
// CHECK: store float [[TmpX]], float* [[Tmp2]]
60+
// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[X]], align 4
61+
// CHECK: store float [[TmpX]], float* [[Tmp1]]
62+
63+
// CHECK: call void @"\01?CalledFunction@@YAXAIAM00@Z"(float* dereferenceable(4) [[X]], float* dereferenceable(4) [[Tmp2]], float* dereferenceable(4) [[Tmp1]])
64+
65+
// X gets restored from parameter 2 _then_ parameter 3, so the last paramter is
66+
// the final value of X.
67+
// CHECK: [[X2:%[0-9A-Z]+]] = load float, float* [[Tmp2]]
68+
// CHECK: store float [[X2]], float* [[X]]
69+
// CHECK: [[X1:%[0-9A-Z]+]] = load float, float* [[Tmp1]]
70+
// CHECK: store float [[X1]], float* [[X]], align 4
71+
// line:31 col:3

tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/gep_arg_nocopy.hlsl

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
// RUN: %dxc -E main -T ps_6_0 -fcgl %s | FileCheck %s
22

3-
// Make sure no memcpy generated for gep arg.
4-
// CHECK-NOT:memcpy
5-
63
struct ST {
74
float4 a[32];
85
int4 b[32];
@@ -21,4 +18,21 @@ float4 bar(float4 a[32]) {
2118

2219
float4 main() : SV_Target {
2320
return bar(st.a);
24-
}
21+
}
22+
23+
// bar should be called with a copy of st.a.
24+
// CHECK: define <4 x float> @main()
25+
// CHECK: [[a:%[0-9A-Z]+]] = getelementptr inbounds %"$Globals", %"$Globals"* {{%[0-9A-Z]+}}, i32 0, i32 0, i32 0
26+
// CHECK: [[Tmpa:%[0-9A-Z]+]] = alloca [32 x <4 x float>]
27+
// CHECK: [[TmpaPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[Tmpa]] to i8*
28+
// CHECK: [[aPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[a]] to i8*
29+
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TmpaPtr]], i8* [[aPtr]], i64 512, i32 1, i1 false)
30+
// CHECK: call <4 x float> @"\01?bar@@YA?AV?$vector@M$03@@Y0CA@V1@@Z"([32 x <4 x float>]* [[Tmpa]])
31+
32+
// Bug: Because a isn't marked noalias, we are generating copies for it.
33+
// CHECK: define internal <4 x float> @"\01?bar@@YA?AV?$vector@M$03@@Y0CA@V1@@Z"([32 x <4 x float>]* [[a:%[0-9a]+]]) #1 {
34+
// CHECK: [[Tmpa:%[0-9A-Z]+]] = alloca [32 x <4 x float>]
35+
// CHECK: [[TmpaPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[Tmpa]] to i8*
36+
// CHECK: [[aPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[a]] to i8*
37+
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TmpaPtr]], i8* [[aPtr]], i64 512, i32 1, i1 false)
38+
// CHECK: call <4 x float> @"\01?foo@@YA?AV?$vector@M$03@@Y0CA@V1@H@Z"([32 x <4 x float>]* [[Tmpa]], i32 {{%[0-9A-Z]+}})
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %dxc -E main -T ps_6_0 -fcgl %s | FileCheck %s
2+
3+
struct ST {
4+
float4 a[32];
5+
int4 b[32];
6+
};
7+
8+
ST st;
9+
int ci;
10+
11+
float4 foo(const float4 a[32], int i) {
12+
return a[i];
13+
}
14+
15+
float4 bar(const float4 a[32]) {
16+
return foo(a, ci);
17+
}
18+
19+
float4 main() : SV_Target {
20+
return bar(st.a);
21+
}
22+
23+
// bar should be called with a copy of st.a.
24+
// CHECK: define <4 x float> @main()
25+
// CHECK: [[a:%[0-9A-Z]+]] = getelementptr inbounds %"$Globals", %"$Globals"* {{%[0-9A-Z]+}}, i32 0, i32 0, i32 0
26+
// CHECK: call <4 x float> @"\01?bar@@YA?AV?$vector@M$03@@Y0CA@$$CBV1@@Z"([32 x <4 x float>]* [[a]])
27+
28+
// Bug: Because a isn't marked noalias, we are generating copies for it.
29+
// CHECK: define internal <4 x float> @"\01?bar@@YA?AV?$vector@M$03@@Y0CA@$$CBV1@@Z"([32 x <4 x float>]* [[a:%[0-9a]+]]) #1 {
30+
// CHECK: [[Tmpa:%[0-9A-Z]+]] = alloca [32 x <4 x float>]
31+
// CHECK: [[TmpaPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[Tmpa]] to i8*
32+
// CHECK: [[aPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[a]] to i8*
33+
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TmpaPtr]], i8* [[aPtr]], i64 512, i32 1, i1 false)
34+
// CHECK: call <4 x float> @"\01?foo@@YA?AV?$vector@M$03@@Y0CA@$$CBV1@H@Z"([32 x <4 x float>]* [[Tmpa]], i32 {{%[0-9A-Z]+}})

tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/not_copy_cbuffer_argument.hlsl

Lines changed: 0 additions & 23 deletions
This file was deleted.

tools/clang/test/HLSLFileCheck/hlsl/resource_binding/res_in_cb1.hlsl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,33 @@
44
//CHECK: tx0.s sampler NA NA S0 s0 1
55
//CHECK: tx1.s sampler NA NA S1 s1 1
66
//CHECK: s sampler NA NA S2 s3 1
7-
//CHECK: tx0.t texture f32 2d T0 t0 1
8-
//CHECK: tx0.t2 texture f32 2d T1 t1 1
9-
//CHECK: tx1.t texture f32 2d T2 t5 1
10-
//CHECK: tx1.t2 texture f32 2d T3 t6 1
7+
//CHECK: tx0.t2 texture f32 2d T0 t1 1
8+
//CHECK: tx0.t texture f32 2d T1 t0 1
9+
//CHECK: tx1.t2 texture f32 2d T2 t6 1
10+
//CHECK: tx1.t texture f32 2d T3 t5 1
1111
//CHECK: x texture f32 2d T4 t3 1
1212

1313
struct LegacyTex
1414
{
15-
Texture2D t;
16-
Texture2D t2;
17-
SamplerState s;
15+
Texture2D t;
16+
Texture2D t2;
17+
SamplerState s;
1818
};
1919

2020
LegacyTex tx0 : register(t0) : register(s0);
2121
LegacyTex tx1 : register(t5) : register(s1);
2222

2323
float4 tex2D(LegacyTex tx, float2 uv)
2424
{
25-
return tx.t.Sample(tx.s,uv) + tx.t2.Sample(tx.s, uv);
25+
return tx.t.Sample(tx.s,uv) + tx.t2.Sample(tx.s, uv);
2626
}
2727

2828
cbuffer n {
29-
Texture2D x: register(t3);
30-
SamplerState s : register(s3);
29+
Texture2D x: register(t3);
30+
SamplerState s : register(s3);
3131
}
3232

3333
float4 main(float2 uv:UV) : SV_Target
3434
{
35-
return tex2D(tx0,uv) + tex2D(tx1,uv) + x.Sample(s,uv);
36-
}
35+
return tex2D(tx0,uv) + tex2D(tx1,uv) + x.Sample(s,uv);
36+
}

tools/clang/test/HLSLFileCheck/hlsl/types/vector/VectorIndexingAsArgument.hlsl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
// CHECK:%[[A5:.*]] = alloca i32
1010
// CHECK:%[[A6:.*]] = alloca i32
1111

12-
// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A1]], i32* nonnull dereferenceable(4) %[[A0]], i32* nonnull dereferenceable(4) %[[A6]])
13-
// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A5]], i32* nonnull dereferenceable(4) %[[A4]], i32* nonnull dereferenceable(4) %[[A6]])
14-
// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A3]], i32* nonnull dereferenceable(4) %[[A2]], i32* nonnull dereferenceable(4) %[[A6]])
12+
// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A0]], i32* nonnull dereferenceable(4) %[[A5]], i32* nonnull dereferenceable(4) %[[A6]])
13+
// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A4]], i32* nonnull dereferenceable(4) %[[A3]], i32* nonnull dereferenceable(4) %[[A6]])
14+
// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A2]], i32* nonnull dereferenceable(4) %[[A1]], i32* nonnull dereferenceable(4) %[[A6]])
1515

1616
struct DimStruct {
1717
uint2 Dims;
@@ -31,4 +31,4 @@ void main() {
3131
(foo((uint(0u)), (SB[1].Dims)[0], (SB[1].Dims)[1], (iMips)));
3232
(foo((uint(0u)), (gs_Dims)[0], (gs_Dims)[1], (iMips)));
3333
SB[2].Dims = gs_Dims;
34-
}
34+
}

tools/clang/test/HLSLFileCheck/shader_targets/library/lib_arg_flatten/lib_empty_struct_arg.hlsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
// Make sure calls with empty struct params are well-behaved
44

5-
// CHECK: define float @"\01?test2@@YAMUT@@@Z"(%struct.T* %t)
5+
// CHECK: define float @"\01?test2@@YAMUT@@@Z"(%struct.T* nocapture readnone %t)
66
// CHECK-NOT:memcpy
77
// CHECK-NOT:load
88
// CHECK-NOT:store
@@ -16,4 +16,4 @@ float test(T t);
1616

1717
float test2(T t): SV_Target {
1818
return test(t);
19-
}
19+
}

0 commit comments

Comments
 (0)