Skip to content

Commit 2c7c07d

Browse files
authored
[DirectX] Remove the "checked" variants of dx.resource.load (#120778)
We'd introduced separate versions of `llvm.dx.resource.load` with a struct return to handle the CheckAccessFullyMapped case without making the IR for the common case unnecessarily complicated. However, at this point the common case is really `resource.getpointer`, so the ergonomics of a simplified version of `load` don't actually gain us as much as the cost of having multiple opcodes. Drop the `dx.resource.loadchecked` functions and have `dx.resource.load` consistently return `{element_type, i1}`.
1 parent 71ddde8 commit 2c7c07d

File tree

9 files changed

+84
-83
lines changed

9 files changed

+84
-83
lines changed

llvm/docs/DirectX/DXILResources.rst

Lines changed: 9 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,8 @@ instead. That is, ``llvm.dx.resource.load.typedbuffer`` from a
296296
of 4 floats, and from ``Buffer<double2>`` a vector of two doubles, etc. The
297297
operations are then expanded out to match DXIL's format during lowering.
298298

299-
In cases where we need ``CheckAccessFullyMapped``, we have a second intrinsic
300-
that returns an anonymous struct with element-0 being the contained type, and
299+
In order to support ``CheckAccessFullyMapped``, we need these intrinsics to
300+
return an anonymous struct with element-0 being the contained type, and
301301
element-1 being the ``i1`` result of a ``CheckAccessFullyMapped`` call. We
302302
don't have a separate call to ``CheckAccessFullyMapped`` at all, since that's
303303
the only operation that can possibly be done on this value. In practice this
@@ -317,8 +317,8 @@ HLSL source, but this actually matches DXC's behaviour in practice.
317317
- Description
318318
* - Return value
319319
-
320-
- The contained type of the buffer
321-
- The data loaded from the buffer
320+
- A structure of the contained type and the check bit
321+
- The data loaded from the buffer and the check bit
322322
* - ``%buffer``
323323
- 0
324324
- ``target(dx.TypedBuffer, ...)``
@@ -332,48 +332,22 @@ Examples:
332332

333333
.. code-block:: llvm
334334
335-
%ret = call <4 x float>
335+
%ret = call {<4 x float>, i1}
336336
@llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_0_0_0t(
337337
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index)
338-
%ret = call float
338+
%ret = call {float, i1}
339339
@llvm.dx.resource.load.typedbuffer.f32.tdx.TypedBuffer_f32_0_0_0t(
340340
target("dx.TypedBuffer", float, 0, 0, 0) %buffer, i32 %index)
341-
%ret = call <4 x i32>
341+
%ret = call {<4 x i32>, i1}
342342
@llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v4i32_0_0_0t(
343343
target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) %buffer, i32 %index)
344-
%ret = call <4 x half>
344+
%ret = call {<4 x half>, i1}
345345
@llvm.dx.resource.load.typedbuffer.v4f16.tdx.TypedBuffer_v4f16_0_0_0t(
346346
target("dx.TypedBuffer", <4 x half>, 0, 0, 0) %buffer, i32 %index)
347-
%ret = call <2 x double>
347+
%ret = call {<2 x double>, i1}
348348
@llvm.dx.resource.load.typedbuffer.v2f64.tdx.TypedBuffer_v2f64_0_0t(
349349
target("dx.TypedBuffer", <2 x double>, 0, 0, 0) %buffer, i32 %index)
350350
351-
.. list-table:: ``@llvm.dx.resource.loadchecked.typedbuffer``
352-
:header-rows: 1
353-
354-
* - Argument
355-
-
356-
- Type
357-
- Description
358-
* - Return value
359-
-
360-
- A structure of the contained type and the check bit
361-
- The data loaded from the buffer and the check bit
362-
* - ``%buffer``
363-
- 0
364-
- ``target(dx.TypedBuffer, ...)``
365-
- The buffer to load from
366-
* - ``%index``
367-
- 1
368-
- ``i32``
369-
- Index into the buffer
370-
371-
.. code-block:: llvm
372-
373-
%ret = call {<4 x float>, i1}
374-
@llvm.dx.resource.loadchecked.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_0_0_0t(
375-
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index)
376-
377351
Texture and Typed Buffer Stores
378352
-------------------------------
379353

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ def int_dx_resource_getpointer
3131
: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty],
3232
[IntrNoMem]>;
3333
def int_dx_resource_load_typedbuffer
34-
: DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty],
35-
[IntrReadMem]>;
36-
def int_dx_resource_loadchecked_typedbuffer
3734
: DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
3835
[llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
3936
def int_dx_resource_store_typedbuffer
@@ -43,7 +40,7 @@ def int_dx_resource_store_typedbuffer
4340
def int_dx_resource_updatecounter
4441
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
4542
[IntrInaccessibleMemOrArgMemOnly]>;
46-
43+
4744
// Cast between target extension handle types and dxil-style opaque handles
4845
def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
4946

@@ -105,7 +102,7 @@ def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrCon
105102
def int_dx_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
106103
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
107104
def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
108-
def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
105+
def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
109106
[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
110107
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
111108
def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;

llvm/lib/Target/DirectX/DXILOpLowering.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -415,8 +415,16 @@ class OpLowerer {
415415
}
416416
}
417417

418-
OldResult = cast<Instruction>(
419-
IRB.CreateExtractValue(Op, 0, OldResult->getName()));
418+
if (OldResult->use_empty()) {
419+
// Only the check bit was used, so we're done here.
420+
OldResult->eraseFromParent();
421+
return Error::success();
422+
}
423+
424+
assert(OldResult->hasOneUse() &&
425+
isa<ExtractValueInst>(*OldResult->user_begin()) &&
426+
"Expected only use to be extract of first element");
427+
OldResult = cast<Instruction>(*OldResult->user_begin());
420428
OldTy = ST->getElementType(0);
421429
}
422430

@@ -723,9 +731,6 @@ class OpLowerer {
723731
HasErrors |= lowerGetPointer(F);
724732
break;
725733
case Intrinsic::dx_resource_load_typedbuffer:
726-
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/false);
727-
break;
728-
case Intrinsic::dx_resource_loadchecked_typedbuffer:
729734
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
730735
break;
731736
case Intrinsic::dx_resource_store_typedbuffer:

llvm/lib/Target/DirectX/DXILResourceAccess.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
3030
"Unexpected typed buffer type");
3131
Type *ContainedType = HandleType->getTypeParameter(0);
3232

33+
Type *LoadType =
34+
StructType::get(ContainedType, Type::getInt1Ty(II->getContext()));
35+
3336
// We need the size of an element in bytes so that we can calculate the offset
3437
// in elements given a total offset in bytes later.
3538
Type *ScalarType = ContainedType->getScalarType();
@@ -81,13 +84,15 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
8184
// We're storing a scalar, so we need to load the current value and only
8285
// replace the relevant part.
8386
auto *Load = Builder.CreateIntrinsic(
84-
ContainedType, Intrinsic::dx_resource_load_typedbuffer,
87+
LoadType, Intrinsic::dx_resource_load_typedbuffer,
8588
{II->getOperand(0), II->getOperand(1)});
89+
auto *Struct = Builder.CreateExtractValue(Load, {0});
90+
8691
// If we have an offset from seeing a GEP earlier, use it.
8792
Value *IndexOp = Current.Index
8893
? Current.Index
8994
: ConstantInt::get(Builder.getInt32Ty(), 0);
90-
V = Builder.CreateInsertElement(Load, V, IndexOp);
95+
V = Builder.CreateInsertElement(Struct, V, IndexOp);
9196
} else {
9297
llvm_unreachable("Store to typed resource has invalid type");
9398
}
@@ -101,8 +106,10 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
101106
} else if (auto *LI = dyn_cast<LoadInst>(Current.Access)) {
102107
IRBuilder<> Builder(LI);
103108
Value *V = Builder.CreateIntrinsic(
104-
ContainedType, Intrinsic::dx_resource_load_typedbuffer,
109+
LoadType, Intrinsic::dx_resource_load_typedbuffer,
105110
{II->getOperand(0), II->getOperand(1)});
111+
V = Builder.CreateExtractValue(V, {0});
112+
106113
if (Current.Index)
107114
V = Builder.CreateExtractElement(V, Current.Index);
108115

llvm/test/CodeGen/DirectX/BufferLoad.ll

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ define void @loadv4f32() {
1717
; CHECK-NOT: %dx.resource.casthandle
1818

1919
; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
20-
%data0 = call <4 x float> @llvm.dx.resource.load.typedbuffer(
20+
%load0 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
2121
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
22+
%data0 = extractvalue {<4 x float>, i1} %load0, 0
2223

2324
; The extract order depends on the users, so don't enforce that here.
2425
; CHECK-DAG: [[VAL0_0:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 0
@@ -34,8 +35,9 @@ define void @loadv4f32() {
3435
call void @scalar_user(float %data0_2)
3536

3637
; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef)
37-
%data4 = call <4 x float> @llvm.dx.resource.load.typedbuffer(
38+
%load4 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
3839
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 4)
40+
%data4 = extractvalue {<4 x float>, i1} %load4, 0
3941

4042
; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 0
4143
; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 1
@@ -48,8 +50,9 @@ define void @loadv4f32() {
4850
call void @vector_user(<4 x float> %data4)
4951

5052
; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef)
51-
%data12 = call <4 x float> @llvm.dx.resource.load.typedbuffer(
53+
%load12 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
5254
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 12)
55+
%data12 = extractvalue {<4 x float>, i1} %load12, 0
5356

5457
; CHECK: [[DATA12_3:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA12]], 3
5558
%data12_3 = extractelement <4 x float> %data12, i32 3
@@ -70,8 +73,9 @@ define void @index_dynamic(i32 %bufindex, i32 %elemindex) {
7073
i32 0, i32 0, i32 1, i32 0, i1 false)
7174

7275
; CHECK: [[LOAD:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 %bufindex, i32 undef)
73-
%load = call <4 x float> @llvm.dx.resource.load.typedbuffer(
76+
%load = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
7477
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %bufindex)
78+
%data = extractvalue {<4 x float>, i1} %load, 0
7579

7680
; CHECK: [[ALLOCA:%.*]] = alloca [4 x float]
7781
; CHECK: [[V0:%.*]] = extractvalue %dx.types.ResRet.f32 [[LOAD]], 0
@@ -89,10 +93,10 @@ define void @index_dynamic(i32 %bufindex, i32 %elemindex) {
8993
;
9094
; CHECK: [[PTR:%.*]] = getelementptr inbounds [4 x float], ptr [[ALLOCA]], i32 0, i32 %elemindex
9195
; CHECK: [[X:%.*]] = load float, ptr [[PTR]]
92-
%data = extractelement <4 x float> %load, i32 %elemindex
96+
%x = extractelement <4 x float> %data, i32 %elemindex
9397

9498
; CHECK: call void @scalar_user(float [[X]])
95-
call void @scalar_user(float %data)
99+
call void @scalar_user(float %x)
96100

97101
ret void
98102
}
@@ -105,8 +109,9 @@ define void @loadf32() {
105109
i32 0, i32 0, i32 1, i32 0, i1 false)
106110

107111
; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
108-
%data0 = call float @llvm.dx.resource.load.typedbuffer(
112+
%load0 = call {float, i1} @llvm.dx.resource.load.typedbuffer(
109113
target("dx.TypedBuffer", float, 0, 0, 0) %buffer, i32 0)
114+
%data0 = extractvalue {float, i1} %load0, 0
110115

111116
; CHECK: [[VAL0:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 0
112117
; CHECK: call void @scalar_user(float [[VAL0]])
@@ -123,7 +128,7 @@ define void @loadv2f32() {
123128
i32 0, i32 0, i32 1, i32 0, i1 false)
124129

125130
; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
126-
%data0 = call <2 x float> @llvm.dx.resource.load.typedbuffer(
131+
%data0 = call {<2 x float>, i1} @llvm.dx.resource.load.typedbuffer(
127132
target("dx.TypedBuffer", <2 x float>, 0, 0, 0) %buffer, i32 0)
128133

129134
ret void
@@ -137,7 +142,7 @@ define void @loadv4f32_checkbit() {
137142
i32 0, i32 0, i32 1, i32 0, i1 false)
138143

139144
; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
140-
%data0 = call {<4 x float>, i1} @llvm.dx.resource.loadchecked.typedbuffer.f32(
145+
%data0 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer.f32(
141146
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
142147

143148
; CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 4
@@ -158,7 +163,7 @@ define void @loadv4i32() {
158163
i32 0, i32 0, i32 1, i32 0, i1 false)
159164

160165
; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
161-
%data0 = call <4 x i32> @llvm.dx.resource.load.typedbuffer(
166+
%data0 = call {<4 x i32>, i1} @llvm.dx.resource.load.typedbuffer(
162167
target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) %buffer, i32 0)
163168

164169
ret void
@@ -172,7 +177,7 @@ define void @loadv4f16() {
172177
i32 0, i32 0, i32 1, i32 0, i1 false)
173178

174179
; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f16 @dx.op.bufferLoad.f16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
175-
%data0 = call <4 x half> @llvm.dx.resource.load.typedbuffer(
180+
%data0 = call {<4 x half>, i1} @llvm.dx.resource.load.typedbuffer(
176181
target("dx.TypedBuffer", <4 x half>, 0, 0, 0) %buffer, i32 0)
177182

178183
ret void
@@ -186,7 +191,7 @@ define void @loadv4i16() {
186191
i32 0, i32 0, i32 1, i32 0, i1 false)
187192

188193
; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i16 @dx.op.bufferLoad.i16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
189-
%data0 = call <4 x i16> @llvm.dx.resource.load.typedbuffer(
194+
%data0 = call {<4 x i16>, i1} @llvm.dx.resource.load.typedbuffer(
190195
target("dx.TypedBuffer", <4 x i16>, 0, 0, 0) %buffer, i32 0)
191196

192197
ret void

llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,19 @@ define void @load_float4(i32 %index, i32 %elemindex) {
1515
%ptr = call ptr @llvm.dx.resource.getpointer(
1616
target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
1717

18-
; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
18+
; CHECK: %[[VALUE:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
1919
%vec_data = load <4 x float>, ptr %ptr
2020
call void @use_float4(<4 x float> %vec_data)
2121

22-
; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
22+
; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
23+
; CHECK: %[[VALUE:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0
2324
; CHECK: extractelement <4 x float> %[[VALUE]], i32 1
2425
%y_ptr = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 1
2526
%y_data = load float, ptr %y_ptr
2627
call void @use_float(float %y_data)
2728

28-
; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
29+
; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
30+
; CHECK: %[[VALUE:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0
2931
; CHECK: extractelement <4 x float> %[[VALUE]], i32 %elemindex
3032
%dynamic = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 %elemindex
3133
%dyndata = load float, ptr %dynamic

0 commit comments

Comments
 (0)