Skip to content

Commit 4ae7348

Browse files
authored
[DirectX] Teach DXILResourceAccess about cbuffers (llvm#164554)
This isn't reachable today but will come into play once we reorder passes for llvm#147352 and llvm#147351. Note that the `CBufferRowIntrin` helper struct is copied from the `DXILCBufferAccess` pass, but it will be removed from there when we simplify that pass in llvm#147351
1 parent f63d33d commit 4ae7348

File tree

9 files changed

+761
-10
lines changed

9 files changed

+761
-10
lines changed

llvm/lib/Target/DirectX/DXILResourceAccess.cpp

Lines changed: 146 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "DirectX.h"
1111
#include "llvm/ADT/SetVector.h"
1212
#include "llvm/Analysis/DXILResource.h"
13+
#include "llvm/Frontend/HLSL/HLSLResource.h"
1314
#include "llvm/IR/BasicBlock.h"
1415
#include "llvm/IR/Dominators.h"
1516
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +21,7 @@
2021
#include "llvm/IR/IntrinsicsDirectX.h"
2122
#include "llvm/IR/User.h"
2223
#include "llvm/InitializePasses.h"
24+
#include "llvm/Support/FormatVariadic.h"
2325
#include "llvm/Transforms/Utils/ValueMapper.h"
2426

2527
#define DEBUG_TYPE "dxil-resource-access"
@@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset,
4446
APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
4547
if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
4648
APInt Scaled = ConstantOffset.udiv(ScalarSize);
47-
return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled);
49+
return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled);
4850
}
4951

50-
auto IndexIt = GEP->idx_begin();
51-
assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
52-
"GEP is not indexing through pointer");
53-
++IndexIt;
54-
Value *Offset = *IndexIt;
55-
assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
56-
return Offset;
52+
unsigned NumIndices = GEP->getNumIndices();
53+
54+
// If we have a single index we're indexing into a top level array. This
55+
// generally only happens with cbuffers.
56+
if (NumIndices == 1)
57+
return *GEP->idx_begin();
58+
59+
// If we have two indices, this should be a simple access through a pointer.
60+
if (NumIndices == 2) {
61+
auto IndexIt = GEP->idx_begin();
62+
assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
63+
"GEP is not indexing through pointer");
64+
++IndexIt;
65+
Value *Offset = *IndexIt;
66+
assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
67+
return Offset;
68+
}
69+
70+
llvm_unreachable("Unhandled GEP structure for resource access");
5771
}
5872

5973
static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
@@ -171,6 +185,127 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) {
171185
LI->replaceAllUsesWith(V);
172186
}
173187

188+
namespace {
189+
/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
190+
struct CBufferRowIntrin {
191+
Intrinsic::ID IID;
192+
Type *RetTy;
193+
unsigned int EltSize;
194+
unsigned int NumElts;
195+
196+
CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
197+
assert(Ty == Ty->getScalarType() && "Expected scalar type");
198+
199+
switch (DL.getTypeSizeInBits(Ty)) {
200+
case 16:
201+
IID = Intrinsic::dx_resource_load_cbufferrow_8;
202+
RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
203+
EltSize = 2;
204+
NumElts = 8;
205+
break;
206+
case 32:
207+
IID = Intrinsic::dx_resource_load_cbufferrow_4;
208+
RetTy = StructType::get(Ty, Ty, Ty, Ty);
209+
EltSize = 4;
210+
NumElts = 4;
211+
break;
212+
case 64:
213+
IID = Intrinsic::dx_resource_load_cbufferrow_2;
214+
RetTy = StructType::get(Ty, Ty);
215+
EltSize = 8;
216+
NumElts = 2;
217+
break;
218+
default:
219+
llvm_unreachable("Only 16, 32, and 64 bit types supported");
220+
}
221+
}
222+
};
223+
} // namespace
224+
225+
static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
226+
dxil::ResourceTypeInfo &RTI) {
227+
const DataLayout &DL = LI->getDataLayout();
228+
229+
Type *Ty = LI->getType();
230+
assert(!isa<StructType>(Ty) && "Structs not handled yet");
231+
CBufferRowIntrin Intrin(DL, Ty->getScalarType());
232+
233+
StringRef Name = LI->getName();
234+
Value *Handle = II->getOperand(0);
235+
236+
IRBuilder<> Builder(LI);
237+
238+
ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1));
239+
assert(GlobalOffset && "CBuffer getpointer index must be constant");
240+
241+
unsigned int FixedOffset = GlobalOffset->getZExtValue();
242+
// If we have a further constant offset we can just fold it in to the fixed
243+
// offset.
244+
if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
245+
FixedOffset += ConstOffset->getZExtValue();
246+
Offset = nullptr;
247+
}
248+
249+
Value *CurrentRow = ConstantInt::get(
250+
Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes);
251+
unsigned int CurrentIndex =
252+
(FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
253+
254+
assert(!(CurrentIndex && Offset) &&
255+
"Dynamic indexing into elements of cbuffer rows is not supported");
256+
// At this point if we have a non-constant offset it has to be an array
257+
// offset, so we can assume that it's a multiple of the row size.
258+
if (Offset)
259+
CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset;
260+
261+
auto *CBufLoad = Builder.CreateIntrinsic(
262+
Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load");
263+
auto *Elt =
264+
Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract");
265+
266+
// At this point we've loaded the first scalar of our result, but our original
267+
// type may have been a vector.
268+
unsigned int Remaining =
269+
((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
270+
if (Remaining == 0) {
271+
// We only have a single element, so we're done.
272+
Value *Result = Elt;
273+
274+
// However, if we loaded a <1 x T>, then we need to adjust the type.
275+
if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
276+
assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
277+
Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
278+
Builder.getInt32(0), Name);
279+
}
280+
LI->replaceAllUsesWith(Result);
281+
return;
282+
}
283+
284+
// Walk each element and extract it, wrapping to new rows as needed.
285+
SmallVector<Value *> Extracts{Elt};
286+
while (Remaining--) {
287+
CurrentIndex %= Intrin.NumElts;
288+
289+
if (CurrentIndex == 0) {
290+
CurrentRow = Builder.CreateAdd(CurrentRow,
291+
ConstantInt::get(Builder.getInt32Ty(), 1));
292+
CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID,
293+
{Handle, CurrentRow}, nullptr,
294+
Name + ".load");
295+
}
296+
297+
Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
298+
Name + ".extract"));
299+
}
300+
301+
// Finally, we build up the original loaded value.
302+
Value *Result = PoisonValue::get(Ty);
303+
for (int I = 0, E = Extracts.size(); I < E; ++I)
304+
Result = Builder.CreateInsertElement(
305+
Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I));
306+
LI->replaceAllUsesWith(Result);
307+
}
308+
174309
static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
175310
dxil::ResourceTypeInfo &RTI) {
176311
switch (RTI.getResourceKind()) {
@@ -179,6 +314,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
179314
case dxil::ResourceKind::RawBuffer:
180315
case dxil::ResourceKind::StructuredBuffer:
181316
return createRawLoad(II, LI, Offset);
317+
case dxil::ResourceKind::CBuffer:
318+
return createCBufferLoad(II, LI, Offset, RTI);
182319
case dxil::ResourceKind::Texture1D:
183320
case dxil::ResourceKind::Texture2D:
184321
case dxil::ResourceKind::Texture2DMS:
@@ -190,9 +327,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
190327
case dxil::ResourceKind::TextureCubeArray:
191328
case dxil::ResourceKind::FeedbackTexture2D:
192329
case dxil::ResourceKind::FeedbackTexture2DArray:
193-
case dxil::ResourceKind::CBuffer:
194330
case dxil::ResourceKind::TBuffer:
195-
// TODO: handle these
331+
reportFatalUsageError("Load not yet implemented for resource type");
196332
return;
197333
case dxil::ResourceKind::Sampler:
198334
case dxil::ResourceKind::RTAccelerationStructure:
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
2+
;
3+
; Tests for indexed types in dynamically indexed arrays in cbuffers.
4+
;
5+
; struct S {
6+
; float x[2];
7+
; uint q;
8+
; };
9+
; cbuffer CB : register(b0) {
10+
; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3
11+
; S v[3]; // offset 48, size 24 (+8) * 3
12+
; }
13+
%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }>
14+
%__cblayout_CB = type <{
15+
<{
16+
[2 x <{ <3 x i32>, target("dx.Padding", 4) }>],
17+
<3 x i32>
18+
}>,
19+
target("dx.Padding", 4),
20+
<{
21+
[2 x <{ %S, target("dx.Padding", 8) }>], %S
22+
}>
23+
}>
24+
25+
@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
26+
27+
; CHECK: define void @f
28+
define void @f(ptr %dst, i32 %idx) {
29+
entry:
30+
%CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
31+
store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
32+
33+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
34+
%CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
35+
36+
;; w[2].z
37+
;
38+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
39+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
40+
; CHECK: store i32 [[X]], ptr %dst
41+
%w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
42+
%w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_ptr, i32 40
43+
%w_load = load i32, ptr addrspace(2) %w_gep, align 4
44+
store i32 %w_load, ptr %dst, align 4
45+
46+
;; v[2].q
47+
;
48+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8)
49+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
50+
; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
51+
; CHECK: store i32 [[X]], ptr [[PTR]]
52+
%v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
53+
%v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_ptr, i32 84
54+
%v_load = load i32, ptr addrspace(2) %v_gep, align 4
55+
%v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
56+
store i32 %v_load, ptr %v.i, align 4
57+
58+
ret void
59+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
2+
;
3+
; Test for when we have indices into both the array and the vector: ie, s[1][3]
4+
5+
; cbuffer CB : register(b0) {
6+
; uint4 s[3]; // offset 0, size 16 * 3
7+
; }
8+
%__cblayout_CB = type <{ [2 x <4 x i32>] }>
9+
10+
@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
11+
12+
; CHECK: define void @f
13+
define void @f(ptr %dst) {
14+
entry:
15+
%CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
16+
store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
17+
18+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
19+
%CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
20+
21+
;; s[1][3]
22+
;
23+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
24+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
25+
; CHECK: store i32 [[X]], ptr %dst
26+
%i8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
27+
%i8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %i8_ptr, i32 28
28+
%i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4
29+
store i32 %i8_vecext, ptr %dst, align 4
30+
31+
;; s[2].w
32+
;
33+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
34+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
35+
;;
36+
;; It would be nice to avoid the redundant vector creation here, but that's
37+
;; outside of the scope of this pass.
38+
;;
39+
; CHECK: [[X_VEC:%.*]] = insertelement <4 x i32> {{%.*}}, i32 [[X]], i32 3
40+
; CHECK: [[X_EXT:%.*]] = extractelement <4 x i32> [[X_VEC]], i32 3
41+
; CHECK: store i32 [[X_EXT]], ptr %dst
42+
%typed_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
43+
%typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 2
44+
%typed_load = load <4 x i32>, ptr addrspace(2) %typed_gep, align 16
45+
%typed_vecext = extractelement <4 x i32> %typed_load, i32 3
46+
store i32 %typed_vecext, ptr %dst, align 4
47+
48+
ret void
49+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
2+
3+
; cbuffer CB : register(b0) {
4+
; float a1[3];
5+
; }
6+
%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>
7+
8+
@CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison
9+
10+
; CHECK: define void @f
11+
define void @f(ptr %dst) {
12+
entry:
13+
%CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
14+
store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
15+
16+
;; a1[1]
17+
;; Note that the valid GEPs of a1 are `0, 0, 0`, `0, 0, 1`, and `0, 1`.
18+
;
19+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
20+
; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
21+
; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
22+
; CHECK: store float [[X]], ptr %dst
23+
%CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8
24+
%a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
25+
%a1_gep = getelementptr inbounds <{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) %a1_ptr, i32 0, i32 0, i32 1
26+
%a1 = load float, ptr addrspace(2) %a1_gep, align 4
27+
store float %a1, ptr %dst, align 32
28+
29+
ret void
30+
}

0 commit comments

Comments
 (0)