-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[HLSL] Codegen for indexing of sub-arrays of multi-dimensional resource arrays #154248
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
109 changes: 109 additions & 0 deletions
109
clang/test/CodeGenHLSL/resources/res-array-global-subarray-many.hlsl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \ | ||
// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s | ||
|
||
// CHECK: @[[BufA:.*]] = private unnamed_addr constant [2 x i8] c"A\00", align 1 | ||
|
||
RWBuffer<float> A[5][4][3][2] : register(u10, space2); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor nit: I think 'space2' is meaningless for this test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The value is passed into the constructor call. |
||
RWStructuredBuffer<float> Out; | ||
|
||
// CHECK: define {{.*}} float @_Z3fooA3_A2_N4hlsl8RWBufferIfEE(ptr noundef byval([3 x [2 x %"class.hlsl::RWBuffer"]]) align 4 %Arr) | ||
// CHECK-NEXT: entry: | ||
float foo(RWBuffer<float> Arr[3][2]) { | ||
hekota marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// CHECK-NEXT: %[[Arr_1_Ptr:.*]] = getelementptr inbounds [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %Arr, i32 0, i32 1 | ||
// CHECK-NEXT: %[[Arr_1_0_Ptr:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %[[Arr_1_Ptr]], i32 0, i32 0 | ||
// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Arr_1_0_Ptr]], i32 noundef 0) | ||
// CHECK-NEXT: %[[Value:.*]] = load float, ptr %[[BufPtr]], align 4 | ||
// CHECK-NEXT: ret float %[[Value]] | ||
return Arr[1][0][0]; | ||
} | ||
|
||
// NOTE: | ||
// - _ZN4hlsl8RWBufferIfEC1EjjijPKc is the constructor call for explicit binding | ||
// (has "jjij" in the mangled name) and the arguments are (register, space, range_size, index, name). | ||
// - _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float> | ||
|
||
// CHECK: define internal void @_Z4mainj(i32 noundef %GI) | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: %[[GI_alloca:.*]] = alloca i32, align 4 | ||
// CHECK-NEXT: %Sub = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4 | ||
// CHECK-NEXT: %[[Tmp0:.*]] = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4 | ||
// CHECK-NEXT: %a = alloca float, align 4 | ||
// CHECK-NEXT: %b = alloca float, align 4 | ||
// CHECK-NEXT: %[[Tmp1:.*]] = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4 | ||
// CHECK-NEXT: %[[Tmp2:.*]] = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4 | ||
// CHECK-NEXT: store i32 %GI, ptr %[[GI_alloca]], align 4 | ||
[numthreads(4,1,1)] | ||
void main(uint GI : SV_GroupThreadID) { | ||
// Codegen for "A[4][1]" - create local array [[Tmp0]] of size 3 x 2 and initialize | ||
// each element by a call to the resource constructor | ||
// The resource index for A[4][1][0][0] is 102 = 4 * (4 * 3 * 2) + 1 * (3 * 2) | ||
// (index in the resource array as if it was flattened) | ||
// CHECK-NEXT: %[[Ptr_Tmp0_0_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 0, i32 0 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_0_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 102, ptr noundef @[[BufA]]) | ||
// CHECK-NEXT: %[[Ptr_Tmp0_0_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 0, i32 1 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_0_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 103, ptr noundef @[[BufA]]) | ||
// CHECK-NEXT: %[[Ptr_Tmp0_1_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 1, i32 0 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_1_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 104, ptr noundef @[[BufA]]) | ||
// CHECK-NEXT: %[[Ptr_Tmp0_1_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 1, i32 1 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_1_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 105, ptr noundef @[[BufA]]) | ||
// CHECK-NEXT: %[[Ptr_Tmp0_2_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 2, i32 0 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_2_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 106, ptr noundef @[[BufA]]) | ||
// CHECK-NEXT: %[[Ptr_Tmp0_2_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 2, i32 1 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_2_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 107, ptr noundef @[[BufA]]) | ||
// After this Tmp0 values are copied to %Sub using the standard array loop initializaion | ||
// (generated from ArrayInitLoopExpr AST node) | ||
RWBuffer<float> Sub[3][2] = A[4][1]; | ||
|
||
// CHECK: %[[Ptr_Sub_2:.*]] = getelementptr inbounds [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %Sub, i32 0, i32 2 | ||
// CHECK: %[[Ptr_Sub_2_1:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %[[Ptr_Sub_2]], i32 0, i32 1 | ||
// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Ptr_Sub_2_1]], i32 noundef 0) | ||
// CHECK-NEXT: %[[Sub_2_1_0_Value:.*]] = load float, ptr %[[BufPtr]], align 4 | ||
// CHECK-NEXT: store float %[[Sub_2_1_0_Value]], ptr %a, align 4 | ||
float a = Sub[2][1][0]; | ||
|
||
// Codegen for "foo(A[2][GI])" - create local array [[Tmp2]] of size 3 x 2 and initialize | ||
// each element by a call to the resource constructor with dynamic index, and then | ||
// copy-in the array as an argument of "foo" | ||
|
||
// Calculate the resource index for A[2][GI][0][0] (index in the resource array as if it was flattened) | ||
// The index is 2 * (4 * 3 * 2) + GI * (3 * 2) = 48 + GI * 6 | ||
// CHECK: %[[GI:.*]] = load i32, ptr %[[GI_alloca]], align 4 | ||
// CHECK-NEXT: %[[Index_A_2_GI_Tmp:.*]] = mul i32 %[[GI]], 6 | ||
// CHECK-NEXT: %[[Index_A_2_GI_0_0:.*]] = add i32 %[[Index_A_2_GI_Tmp]], 48 | ||
|
||
// A[2][GI][0][0] | ||
// CHECK-NEXT: %[[Ptr_Tmp2_0_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 0, i32 0 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_0_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_0_0]], ptr noundef @[[BufA]]) | ||
|
||
// A[2][GI][0][1] | ||
// CHECK-NEXT: %[[Index_A_2_GI_0_1:.*]] = add i32 %[[Index_A_2_GI_0_0]], 1 | ||
// CHECK-NEXT: %[[Ptr_Tmp2_0_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 0, i32 1 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_0_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_0_1]], ptr noundef @[[BufA]]) | ||
|
||
// A[2][GI][1][0] | ||
// CHECK-NEXT: %[[Index_A_2_GI_1_0:.*]] = add i32 %[[Index_A_2_GI_0_1]], 1 | ||
// CHECK-NEXT: %[[Ptr_Tmp2_1_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 1, i32 0 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_1_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_1_0]], ptr noundef @[[BufA]]) | ||
|
||
// A[2][GI][1][1] | ||
// CHECK-NEXT: %[[Index_A_2_GI_1_1:.*]] = add i32 %[[Index_A_2_GI_1_0]], 1 | ||
// CHECK-NEXT: %[[Ptr_Tmp2_1_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 1, i32 1 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_1_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_1_1]], ptr noundef @[[BufA]]) | ||
|
||
// A[2][GI][2][0] | ||
// CHECK-NEXT: %[[Index_A_2_GI_2_0:.*]] = add i32 %[[Index_A_2_GI_1_1]], 1 | ||
// CHECK-NEXT: %[[Ptr_Tmp2_2_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 2, i32 0 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_2_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_2_0]], ptr noundef @[[BufA]]) | ||
|
||
// A[2][GI][2][1] | ||
// CHECK-NEXT: %[[Index_A_2_GI_2_1:.*]] = add i32 %[[Index_A_2_GI_2_0]], 1 | ||
// CHECK-NEXT: %[[Ptr_Tmp2_2_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 2, i32 1 | ||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_2_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_2_1]], ptr noundef @[[BufA]]) | ||
|
||
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp1]], ptr align 4 %[[Tmp2]], i32 24, i1 false) | ||
// CHECK-NEXT: %[[FooReturned:.*]] = call {{.*}} float @_Z3fooA3_A2_N4hlsl8RWBufferIfEE(ptr noundef byval([3 x [2 x %"class.hlsl::RWBuffer"]]) align 4 %[[Tmp1]]) | ||
// CHECK-NEXT: store float %[[FooReturned]], ptr %b, align 4 | ||
float b = foo(A[2][GI]); | ||
|
||
Out[0] = a + b; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For learning purposes, when would the result of getArrayDecl ever be expected to be null? That seems weird to me in this case.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It may happen when the
ArraySubscriptExpr
refers to anOpaqueValueExpr
as part of anArrayInitLoopExpr
AST construct.See https://godbolt.org/z/MzKsf8Kh5.
The
ArrayInitLoopExpr
AST node is used for array initialization from another array (see https://clang.llvm.org/doxygen/classclang_1_1ArrayInitLoopExpr.html#details) .Notice how the first and the third
ArraySubscriptExpr
are the same (same pointer value)? We only do HLSL codegen on the firstArraySubscriptExpr
where we create a local resource array with 2 elements. The secondArraySubscriptExpr
is wheregetArrayDecl
will be null because we hit theOpaqueValueExpr
when looking for the decl. It will then be handled by Clang as a local array access on top of the local resource array that we created earlier.