Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 9 additions & 35 deletions llvm/docs/DirectX/DXILResources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,8 @@ instead. That is, ``llvm.dx.resource.load.typedbuffer`` from a
of 4 floats, and from ``Buffer<double2>`` a vector of two doubles, etc. The
operations are then expanded out to match DXIL's format during lowering.

In cases where we need ``CheckAccessFullyMapped``, we have a second intrinsic
that returns an anonymous struct with element-0 being the contained type, and
In order to support ``CheckAccessFullyMapped``, we need these intrinsics to
return an anonymous struct with element-0 being the contained type, and
element-1 being the ``i1`` result of a ``CheckAccessFullyMapped`` call. We
don't have a separate call to ``CheckAccessFullyMapped`` at all, since that's
the only operation that can possibly be done on this value. In practice this
Expand All @@ -317,8 +317,8 @@ HLSL source, but this actually matches DXC's behaviour in practice.
- Description
* - Return value
-
- The contained type of the buffer
- The data loaded from the buffer
- A structure of the contained type and the check bit
- The data loaded from the buffer and the check bit
* - ``%buffer``
- 0
- ``target(dx.TypedBuffer, ...)``
Expand All @@ -332,48 +332,22 @@ Examples:

.. code-block:: llvm

%ret = call <4 x float>
%ret = call {<4 x float>, i1}
@llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_0_0_0t(
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index)
%ret = call float
%ret = call {float, i1}
@llvm.dx.resource.load.typedbuffer.f32.tdx.TypedBuffer_f32_0_0_0t(
target("dx.TypedBuffer", float, 0, 0, 0) %buffer, i32 %index)
%ret = call <4 x i32>
%ret = call {<4 x i32>, i1}
@llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v4i32_0_0_0t(
target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) %buffer, i32 %index)
%ret = call <4 x half>
%ret = call {<4 x half>, i1}
@llvm.dx.resource.load.typedbuffer.v4f16.tdx.TypedBuffer_v4f16_0_0_0t(
target("dx.TypedBuffer", <4 x half>, 0, 0, 0) %buffer, i32 %index)
%ret = call <2 x double>
%ret = call {<2 x double>, i1}
@llvm.dx.resource.load.typedbuffer.v2f64.tdx.TypedBuffer_v2f64_0_0t(
target("dx.TypedBuffer", <2 x double>, 0, 0, 0) %buffer, i32 %index)

.. list-table:: ``@llvm.dx.resource.loadchecked.typedbuffer``
:header-rows: 1

* - Argument
-
- Type
- Description
* - Return value
-
- A structure of the contained type and the check bit
- The data loaded from the buffer and the check bit
* - ``%buffer``
- 0
- ``target(dx.TypedBuffer, ...)``
- The buffer to load from
* - ``%index``
- 1
- ``i32``
- Index into the buffer

.. code-block:: llvm

%ret = call {<4 x float>, i1}
@llvm.dx.resource.loadchecked.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_0_0_0t(
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index)

Texture and Typed Buffer Stores
-------------------------------

Expand Down
7 changes: 2 additions & 5 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ def int_dx_resource_getpointer
: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_dx_resource_load_typedbuffer
: DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty],
[IntrReadMem]>;
def int_dx_resource_loadchecked_typedbuffer
: DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
[llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
def int_dx_resource_store_typedbuffer
Expand All @@ -43,7 +40,7 @@ def int_dx_resource_store_typedbuffer
def int_dx_resource_updatecounter
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
[IntrInaccessibleMemOrArgMemOnly]>;

// Cast between target extension handle types and dxil-style opaque handles
def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;

Expand Down Expand Up @@ -105,7 +102,7 @@ def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrCon
def int_dx_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
Expand Down
15 changes: 10 additions & 5 deletions llvm/lib/Target/DirectX/DXILOpLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,16 @@ class OpLowerer {
}
}

OldResult = cast<Instruction>(
IRB.CreateExtractValue(Op, 0, OldResult->getName()));
if (OldResult->use_empty()) {
// Only the check bit was used, so we're done here.
OldResult->eraseFromParent();
return Error::success();
}

assert(OldResult->hasOneUse() &&
isa<ExtractValueInst>(*OldResult->user_begin()) &&
"Expected only use to be extract of first element");
OldResult = cast<Instruction>(*OldResult->user_begin());
OldTy = ST->getElementType(0);
}

Expand Down Expand Up @@ -723,9 +731,6 @@ class OpLowerer {
HasErrors |= lowerGetPointer(F);
break;
case Intrinsic::dx_resource_load_typedbuffer:
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/false);
break;
case Intrinsic::dx_resource_loadchecked_typedbuffer:
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
break;
case Intrinsic::dx_resource_store_typedbuffer:
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/DirectX/DXILResourceAccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
"Unexpected typed buffer type");
Type *ContainedType = HandleType->getTypeParameter(0);

Type *LoadType =
StructType::get(ContainedType, Type::getInt1Ty(II->getContext()));

// We need the size of an element in bytes so that we can calculate the offset
// in elements given a total offset in bytes later.
Type *ScalarType = ContainedType->getScalarType();
Expand Down Expand Up @@ -81,13 +84,15 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
// We're storing a scalar, so we need to load the current value and only
// replace the relevant part.
auto *Load = Builder.CreateIntrinsic(
ContainedType, Intrinsic::dx_resource_load_typedbuffer,
LoadType, Intrinsic::dx_resource_load_typedbuffer,
{II->getOperand(0), II->getOperand(1)});
auto *Struct = Builder.CreateExtractValue(Load, {0});

// If we have an offset from seeing a GEP earlier, use it.
Value *IndexOp = Current.Index
? Current.Index
: ConstantInt::get(Builder.getInt32Ty(), 0);
V = Builder.CreateInsertElement(Load, V, IndexOp);
V = Builder.CreateInsertElement(Struct, V, IndexOp);
} else {
llvm_unreachable("Store to typed resource has invalid type");
}
Expand All @@ -101,8 +106,10 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
} else if (auto *LI = dyn_cast<LoadInst>(Current.Access)) {
IRBuilder<> Builder(LI);
Value *V = Builder.CreateIntrinsic(
ContainedType, Intrinsic::dx_resource_load_typedbuffer,
LoadType, Intrinsic::dx_resource_load_typedbuffer,
{II->getOperand(0), II->getOperand(1)});
V = Builder.CreateExtractValue(V, {0});

if (Current.Index)
V = Builder.CreateExtractElement(V, Current.Index);

Expand Down
29 changes: 17 additions & 12 deletions llvm/test/CodeGen/DirectX/BufferLoad.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ define void @loadv4f32() {
; CHECK-NOT: %dx.resource.casthandle

; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
%data0 = call <4 x float> @llvm.dx.resource.load.typedbuffer(
%load0 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
%data0 = extractvalue {<4 x float>, i1} %load0, 0

; The extract order depends on the users, so don't enforce that here.
; CHECK-DAG: [[VAL0_0:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 0
Expand All @@ -34,8 +35,9 @@ define void @loadv4f32() {
call void @scalar_user(float %data0_2)

; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef)
%data4 = call <4 x float> @llvm.dx.resource.load.typedbuffer(
%load4 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 4)
%data4 = extractvalue {<4 x float>, i1} %load4, 0

; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 0
; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 1
Expand All @@ -48,8 +50,9 @@ define void @loadv4f32() {
call void @vector_user(<4 x float> %data4)

; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef)
%data12 = call <4 x float> @llvm.dx.resource.load.typedbuffer(
%load12 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 12)
%data12 = extractvalue {<4 x float>, i1} %load12, 0

; CHECK: [[DATA12_3:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA12]], 3
%data12_3 = extractelement <4 x float> %data12, i32 3
Expand All @@ -70,8 +73,9 @@ define void @index_dynamic(i32 %bufindex, i32 %elemindex) {
i32 0, i32 0, i32 1, i32 0, i1 false)

; CHECK: [[LOAD:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 %bufindex, i32 undef)
%load = call <4 x float> @llvm.dx.resource.load.typedbuffer(
%load = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %bufindex)
%data = extractvalue {<4 x float>, i1} %load, 0

; CHECK: [[ALLOCA:%.*]] = alloca [4 x float]
; CHECK: [[V0:%.*]] = extractvalue %dx.types.ResRet.f32 [[LOAD]], 0
Expand All @@ -89,10 +93,10 @@ define void @index_dynamic(i32 %bufindex, i32 %elemindex) {
;
; CHECK: [[PTR:%.*]] = getelementptr inbounds [4 x float], ptr [[ALLOCA]], i32 0, i32 %elemindex
; CHECK: [[X:%.*]] = load float, ptr [[PTR]]
%data = extractelement <4 x float> %load, i32 %elemindex
%x = extractelement <4 x float> %data, i32 %elemindex

; CHECK: call void @scalar_user(float [[X]])
call void @scalar_user(float %data)
call void @scalar_user(float %x)

ret void
}
Expand All @@ -105,8 +109,9 @@ define void @loadf32() {
i32 0, i32 0, i32 1, i32 0, i1 false)

; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
%data0 = call float @llvm.dx.resource.load.typedbuffer(
%load0 = call {float, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", float, 0, 0, 0) %buffer, i32 0)
%data0 = extractvalue {float, i1} %load0, 0

; CHECK: [[VAL0:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 0
; CHECK: call void @scalar_user(float [[VAL0]])
Expand All @@ -123,7 +128,7 @@ define void @loadv2f32() {
i32 0, i32 0, i32 1, i32 0, i1 false)

; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
%data0 = call <2 x float> @llvm.dx.resource.load.typedbuffer(
%data0 = call {<2 x float>, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <2 x float>, 0, 0, 0) %buffer, i32 0)

ret void
Expand All @@ -137,7 +142,7 @@ define void @loadv4f32_checkbit() {
i32 0, i32 0, i32 1, i32 0, i1 false)

; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
%data0 = call {<4 x float>, i1} @llvm.dx.resource.loadchecked.typedbuffer.f32(
%data0 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer.f32(
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)

; CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 4
Expand All @@ -158,7 +163,7 @@ define void @loadv4i32() {
i32 0, i32 0, i32 1, i32 0, i1 false)

; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
%data0 = call <4 x i32> @llvm.dx.resource.load.typedbuffer(
%data0 = call {<4 x i32>, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) %buffer, i32 0)

ret void
Expand All @@ -172,7 +177,7 @@ define void @loadv4f16() {
i32 0, i32 0, i32 1, i32 0, i1 false)

; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f16 @dx.op.bufferLoad.f16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
%data0 = call <4 x half> @llvm.dx.resource.load.typedbuffer(
%data0 = call {<4 x half>, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <4 x half>, 0, 0, 0) %buffer, i32 0)

ret void
Expand All @@ -186,7 +191,7 @@ define void @loadv4i16() {
i32 0, i32 0, i32 1, i32 0, i1 false)

; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i16 @dx.op.bufferLoad.i16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
%data0 = call <4 x i16> @llvm.dx.resource.load.typedbuffer(
%data0 = call {<4 x i16>, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <4 x i16>, 0, 0, 0) %buffer, i32 0)

ret void
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@ define void @load_float4(i32 %index, i32 %elemindex) {
%ptr = call ptr @llvm.dx.resource.getpointer(
target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)

; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
; CHECK: %[[VALUE:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
%vec_data = load <4 x float>, ptr %ptr
call void @use_float4(<4 x float> %vec_data)

; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
; CHECK: %[[VALUE:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0
; CHECK: extractelement <4 x float> %[[VALUE]], i32 1
%y_ptr = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 1
%y_data = load float, ptr %y_ptr
call void @use_float(float %y_data)

; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
; CHECK: %[[VALUE:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0
; CHECK: extractelement <4 x float> %[[VALUE]], i32 %elemindex
%dynamic = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 %elemindex
%dyndata = load float, ptr %dynamic
Expand Down
Loading
Loading