From 74a10a9b78d2ebd9115100a40c73a1d207f23ac1 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Thu, 19 Dec 2024 12:06:57 -0700 Subject: [PATCH] [DirectX] Remove the "checked" variants of `dx.resource.load` We'd introduced separate versions of `llvm.dx.resource.load` with a struct return to handle the CheckAccessFullyMapped case without making the IR for the common case unnecessarily complicated. However, at this point the common case is really `resource.getpointer`, so the ergonomics of a simplified version of `load` don't actually gain us as much as the cost of having multiple opcodes. Drop the `dx.resource.loadchecked` functions and have `dx.resource.load` consistently return `{element_type, i1}`. --- llvm/docs/DirectX/DXILResources.rst | 44 ++++--------------- llvm/include/llvm/IR/IntrinsicsDirectX.td | 7 +-- llvm/lib/Target/DirectX/DXILOpLowering.cpp | 15 ++++--- .../lib/Target/DirectX/DXILResourceAccess.cpp | 13 ++++-- llvm/test/CodeGen/DirectX/BufferLoad.ll | 29 +++++++----- .../ResourceAccess/load_typedbuffer.ll | 8 ++-- .../ResourceAccess/store_typedbuffer.ll | 35 +++++++++------ .../DirectX/ResourceGlobalElimination.ll | 10 +++-- .../typed-uav-load-additional-formats.ll | 6 ++- 9 files changed, 84 insertions(+), 83 deletions(-) diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst index 3971d3788b8a0..406d68e3c9fad 100644 --- a/llvm/docs/DirectX/DXILResources.rst +++ b/llvm/docs/DirectX/DXILResources.rst @@ -296,8 +296,8 @@ instead. That is, ``llvm.dx.resource.load.typedbuffer`` from a of 4 floats, and from ``Buffer`` a vector of two doubles, etc. The operations are then expanded out to match DXIL's format during lowering. -In cases where we need ``CheckAccessFullyMapped``, we have a second intrinsic -that returns an anonymous struct with element-0 being the contained type, and +In order to support ``CheckAccessFullyMapped``, we need these intrinsics to +return an anonymous struct with element-0 being the contained type, and element-1 being the ``i1`` result of a ``CheckAccessFullyMapped`` call. We don't have a separate call to ``CheckAccessFullyMapped`` at all, since that's the only operation that can possibly be done on this value. In practice this @@ -317,8 +317,8 @@ HLSL source, but this actually matches DXC's behaviour in practice. - Description * - Return value - - - The contained type of the buffer - - The data loaded from the buffer + - A structure of the contained type and the check bit + - The data loaded from the buffer and the check bit * - ``%buffer`` - 0 - ``target(dx.TypedBuffer, ...)`` @@ -332,48 +332,22 @@ Examples: .. code-block:: llvm - %ret = call <4 x float> + %ret = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_0_0_0t( target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index) - %ret = call float + %ret = call {float, i1} @llvm.dx.resource.load.typedbuffer.f32.tdx.TypedBuffer_f32_0_0_0t( target("dx.TypedBuffer", float, 0, 0, 0) %buffer, i32 %index) - %ret = call <4 x i32> + %ret = call {<4 x i32>, i1} @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v4i32_0_0_0t( target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) %buffer, i32 %index) - %ret = call <4 x half> + %ret = call {<4 x half>, i1} @llvm.dx.resource.load.typedbuffer.v4f16.tdx.TypedBuffer_v4f16_0_0_0t( target("dx.TypedBuffer", <4 x half>, 0, 0, 0) %buffer, i32 %index) - %ret = call <2 x double> + %ret = call {<2 x double>, i1} @llvm.dx.resource.load.typedbuffer.v2f64.tdx.TypedBuffer_v2f64_0_0t( target("dx.TypedBuffer", <2 x double>, 0, 0, 0) %buffer, i32 %index) -.. list-table:: ``@llvm.dx.resource.loadchecked.typedbuffer`` - :header-rows: 1 - - * - Argument - - - - Type - - Description - * - Return value - - - - A structure of the contained type and the check bit - - The data loaded from the buffer and the check bit - * - ``%buffer`` - - 0 - - ``target(dx.TypedBuffer, ...)`` - - The buffer to load from - * - ``%index`` - - 1 - - ``i32`` - - Index into the buffer - -.. code-block:: llvm - - %ret = call {<4 x float>, i1} - @llvm.dx.resource.loadchecked.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_0_0_0t( - target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index) - Texture and Typed Buffer Stores ------------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index d31d5afe5145a..dfdfda963b627 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -31,9 +31,6 @@ def int_dx_resource_getpointer : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty], [IntrNoMem]>; def int_dx_resource_load_typedbuffer - : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty], - [IntrReadMem]>; -def int_dx_resource_loadchecked_typedbuffer : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty], [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>; def int_dx_resource_store_typedbuffer @@ -43,7 +40,7 @@ def int_dx_resource_store_typedbuffer def int_dx_resource_updatecounter : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty], [IntrInaccessibleMemOrArgMemOnly]>; - + // Cast between target extension handle types and dxil-style opaque handles def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; @@ -105,7 +102,7 @@ def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrCon def int_dx_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>; def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; -def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], +def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 4e01dd1145a55..e0ee4d6d6b130 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -415,8 +415,16 @@ class OpLowerer { } } - OldResult = cast( - IRB.CreateExtractValue(Op, 0, OldResult->getName())); + if (OldResult->use_empty()) { + // Only the check bit was used, so we're done here. + OldResult->eraseFromParent(); + return Error::success(); + } + + assert(OldResult->hasOneUse() && + isa(*OldResult->user_begin()) && + "Expected only use to be extract of first element"); + OldResult = cast(*OldResult->user_begin()); OldTy = ST->getElementType(0); } @@ -723,9 +731,6 @@ class OpLowerer { HasErrors |= lowerGetPointer(F); break; case Intrinsic::dx_resource_load_typedbuffer: - HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/false); - break; - case Intrinsic::dx_resource_loadchecked_typedbuffer: HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true); break; case Intrinsic::dx_resource_store_typedbuffer: diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp index 1ff8f09f066db..837624935c5fa 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp @@ -30,6 +30,9 @@ static void replaceTypedBufferAccess(IntrinsicInst *II, "Unexpected typed buffer type"); Type *ContainedType = HandleType->getTypeParameter(0); + Type *LoadType = + StructType::get(ContainedType, Type::getInt1Ty(II->getContext())); + // We need the size of an element in bytes so that we can calculate the offset // in elements given a total offset in bytes later. Type *ScalarType = ContainedType->getScalarType(); @@ -81,13 +84,15 @@ static void replaceTypedBufferAccess(IntrinsicInst *II, // We're storing a scalar, so we need to load the current value and only // replace the relevant part. auto *Load = Builder.CreateIntrinsic( - ContainedType, Intrinsic::dx_resource_load_typedbuffer, + LoadType, Intrinsic::dx_resource_load_typedbuffer, {II->getOperand(0), II->getOperand(1)}); + auto *Struct = Builder.CreateExtractValue(Load, {0}); + // If we have an offset from seeing a GEP earlier, use it. Value *IndexOp = Current.Index ? Current.Index : ConstantInt::get(Builder.getInt32Ty(), 0); - V = Builder.CreateInsertElement(Load, V, IndexOp); + V = Builder.CreateInsertElement(Struct, V, IndexOp); } else { llvm_unreachable("Store to typed resource has invalid type"); } @@ -101,8 +106,10 @@ static void replaceTypedBufferAccess(IntrinsicInst *II, } else if (auto *LI = dyn_cast(Current.Access)) { IRBuilder<> Builder(LI); Value *V = Builder.CreateIntrinsic( - ContainedType, Intrinsic::dx_resource_load_typedbuffer, + LoadType, Intrinsic::dx_resource_load_typedbuffer, {II->getOperand(0), II->getOperand(1)}); + V = Builder.CreateExtractValue(V, {0}); + if (Current.Index) V = Builder.CreateExtractElement(V, Current.Index); diff --git a/llvm/test/CodeGen/DirectX/BufferLoad.ll b/llvm/test/CodeGen/DirectX/BufferLoad.ll index 7f1291bf4a5c8..86e2217a8e76f 100644 --- a/llvm/test/CodeGen/DirectX/BufferLoad.ll +++ b/llvm/test/CodeGen/DirectX/BufferLoad.ll @@ -17,8 +17,9 @@ define void @loadv4f32() { ; CHECK-NOT: %dx.resource.casthandle ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) - %data0 = call <4 x float> @llvm.dx.resource.load.typedbuffer( + %load0 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0) + %data0 = extractvalue {<4 x float>, i1} %load0, 0 ; The extract order depends on the users, so don't enforce that here. ; CHECK-DAG: [[VAL0_0:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 0 @@ -34,8 +35,9 @@ define void @loadv4f32() { call void @scalar_user(float %data0_2) ; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef) - %data4 = call <4 x float> @llvm.dx.resource.load.typedbuffer( + %load4 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 4) + %data4 = extractvalue {<4 x float>, i1} %load4, 0 ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 0 ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 1 @@ -48,8 +50,9 @@ define void @loadv4f32() { call void @vector_user(<4 x float> %data4) ; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef) - %data12 = call <4 x float> @llvm.dx.resource.load.typedbuffer( + %load12 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 12) + %data12 = extractvalue {<4 x float>, i1} %load12, 0 ; CHECK: [[DATA12_3:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA12]], 3 %data12_3 = extractelement <4 x float> %data12, i32 3 @@ -70,8 +73,9 @@ define void @index_dynamic(i32 %bufindex, i32 %elemindex) { i32 0, i32 0, i32 1, i32 0, i1 false) ; CHECK: [[LOAD:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 %bufindex, i32 undef) - %load = call <4 x float> @llvm.dx.resource.load.typedbuffer( + %load = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %bufindex) + %data = extractvalue {<4 x float>, i1} %load, 0 ; CHECK: [[ALLOCA:%.*]] = alloca [4 x float] ; CHECK: [[V0:%.*]] = extractvalue %dx.types.ResRet.f32 [[LOAD]], 0 @@ -89,10 +93,10 @@ define void @index_dynamic(i32 %bufindex, i32 %elemindex) { ; ; CHECK: [[PTR:%.*]] = getelementptr inbounds [4 x float], ptr [[ALLOCA]], i32 0, i32 %elemindex ; CHECK: [[X:%.*]] = load float, ptr [[PTR]] - %data = extractelement <4 x float> %load, i32 %elemindex + %x = extractelement <4 x float> %data, i32 %elemindex ; CHECK: call void @scalar_user(float [[X]]) - call void @scalar_user(float %data) + call void @scalar_user(float %x) ret void } @@ -105,8 +109,9 @@ define void @loadf32() { i32 0, i32 0, i32 1, i32 0, i1 false) ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) - %data0 = call float @llvm.dx.resource.load.typedbuffer( + %load0 = call {float, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", float, 0, 0, 0) %buffer, i32 0) + %data0 = extractvalue {float, i1} %load0, 0 ; CHECK: [[VAL0:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 0 ; CHECK: call void @scalar_user(float [[VAL0]]) @@ -123,7 +128,7 @@ define void @loadv2f32() { i32 0, i32 0, i32 1, i32 0, i1 false) ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) - %data0 = call <2 x float> @llvm.dx.resource.load.typedbuffer( + %data0 = call {<2 x float>, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", <2 x float>, 0, 0, 0) %buffer, i32 0) ret void @@ -137,7 +142,7 @@ define void @loadv4f32_checkbit() { i32 0, i32 0, i32 1, i32 0, i1 false) ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) - %data0 = call {<4 x float>, i1} @llvm.dx.resource.loadchecked.typedbuffer.f32( + %data0 = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer.f32( target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0) ; CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 4 @@ -158,7 +163,7 @@ define void @loadv4i32() { i32 0, i32 0, i32 1, i32 0, i1 false) ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) - %data0 = call <4 x i32> @llvm.dx.resource.load.typedbuffer( + %data0 = call {<4 x i32>, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) %buffer, i32 0) ret void @@ -172,7 +177,7 @@ define void @loadv4f16() { i32 0, i32 0, i32 1, i32 0, i1 false) ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f16 @dx.op.bufferLoad.f16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) - %data0 = call <4 x half> @llvm.dx.resource.load.typedbuffer( + %data0 = call {<4 x half>, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", <4 x half>, 0, 0, 0) %buffer, i32 0) ret void @@ -186,7 +191,7 @@ define void @loadv4i16() { i32 0, i32 0, i32 1, i32 0, i1 false) ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i16 @dx.op.bufferLoad.i16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) - %data0 = call <4 x i16> @llvm.dx.resource.load.typedbuffer( + %data0 = call {<4 x i16>, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", <4 x i16>, 0, 0, 0) %buffer, i32 0) ret void diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll index 9b7e7fd04f605..8769e6ec66d8e 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll @@ -15,17 +15,19 @@ define void @load_float4(i32 %index, i32 %elemindex) { %ptr = call ptr @llvm.dx.resource.getpointer( target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) - ; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VALUE:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) %vec_data = load <4 x float>, ptr %ptr call void @use_float4(<4 x float> %vec_data) - ; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VALUE:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0 ; CHECK: extractelement <4 x float> %[[VALUE]], i32 1 %y_ptr = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 1 %y_data = load float, ptr %y_ptr call void @use_float(float %y_data) - ; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VALUE:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0 ; CHECK: extractelement <4 x float> %[[VALUE]], i32 %elemindex %dynamic = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 %elemindex %dyndata = load float, ptr %dynamic diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll index 17606408cadff..0b7882ac722e8 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll @@ -18,21 +18,24 @@ define void @store_float4(<4 x float> %data, i32 %index, i32 %elemindex) { ; Store just the .x component %scalar = extractelement <4 x float> %data, i32 0 - ; CHECK: %[[LOAD:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) - ; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[LOAD]], float %scalar, i32 0 + ; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VEC:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0 + ; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[VEC]], float %scalar, i32 0 ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index, <4 x float> %[[INSERT]]) store float %scalar, ptr %ptr ; Store just the .y component - ; CHECK: %[[LOAD:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) - ; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[LOAD]], float %scalar, i32 1 + ; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VEC:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0 + ; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[VEC]], float %scalar, i32 1 ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index, <4 x float> %[[INSERT]]) %y_ptr = getelementptr inbounds i8, ptr %ptr, i32 4 store float %scalar, ptr %y_ptr ; Store to one of the elements dynamically - ; CHECK: %[[LOAD:.*]] = call <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) - ; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[LOAD]], float %scalar, i32 %elemindex + ; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VEC:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0 + ; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[VEC]], float %scalar, i32 %elemindex ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index, <4 x float> %[[INSERT]]) %dynamic = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 %elemindex store float %scalar, ptr %dynamic @@ -56,14 +59,16 @@ define void @store_half4(<4 x half> %data, i32 %index) { ; Store just the .x component %scalar = extractelement <4 x half> %data, i32 0 - ; CHECK: %[[LOAD:.*]] = call <4 x half> @llvm.dx.resource.load.typedbuffer.v4f16.tdx.TypedBuffer_v4f16_1_0_0t(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index) - ; CHECK: %[[INSERT:.*]] = insertelement <4 x half> %[[LOAD]], half %scalar, i32 0 + ; CHECK: %[[LOAD:.*]] = call { <4 x half>, i1 } @llvm.dx.resource.load.typedbuffer.v4f16.tdx.TypedBuffer_v4f16_1_0_0t(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VEC:.*]] = extractvalue { <4 x half>, i1 } %[[LOAD]], 0 + ; CHECK: %[[INSERT:.*]] = insertelement <4 x half> %[[VEC]], half %scalar, i32 0 ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f16_1_0_0t.v4f16(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index, <4 x half> %[[INSERT]]) store half %scalar, ptr %ptr ; Store just the .y component - ; CHECK: %[[LOAD:.*]] = call <4 x half> @llvm.dx.resource.load.typedbuffer.v4f16.tdx.TypedBuffer_v4f16_1_0_0t(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index) - ; CHECK: %[[INSERT:.*]] = insertelement <4 x half> %[[LOAD]], half %scalar, i32 1 + ; CHECK: %[[LOAD:.*]] = call { <4 x half>, i1 } @llvm.dx.resource.load.typedbuffer.v4f16.tdx.TypedBuffer_v4f16_1_0_0t(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VEC:.*]] = extractvalue { <4 x half>, i1 } %[[LOAD]], 0 + ; CHECK: %[[INSERT:.*]] = insertelement <4 x half> %[[VEC]], half %scalar, i32 1 ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f16_1_0_0t.v4f16(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index, <4 x half> %[[INSERT]]) %y_ptr = getelementptr inbounds i8, ptr %ptr, i32 2 store half %scalar, ptr %y_ptr @@ -87,14 +92,16 @@ define void @store_double2(<2 x double> %data, i32 %index) { ; Store just the .x component %scalar = extractelement <2 x double> %data, i32 0 - ; CHECK: %[[LOAD:.*]] = call <2 x double> @llvm.dx.resource.load.typedbuffer.v2f64.tdx.TypedBuffer_v2f64_1_0_0t(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index) - ; CHECK: %[[INSERT:.*]] = insertelement <2 x double> %[[LOAD]], double %scalar, i32 0 + ; CHECK: %[[LOAD:.*]] = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer.v2f64.tdx.TypedBuffer_v2f64_1_0_0t(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VEC:.*]] = extractvalue { <2 x double>, i1 } %[[LOAD]], 0 + ; CHECK: %[[INSERT:.*]] = insertelement <2 x double> %[[VEC]], double %scalar, i32 0 ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v2f64(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index, <2 x double> %[[INSERT]]) store double %scalar, ptr %ptr ; Store just the .y component - ; CHECK: %[[LOAD:.*]] = call <2 x double> @llvm.dx.resource.load.typedbuffer.v2f64.tdx.TypedBuffer_v2f64_1_0_0t(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index) - ; CHECK: %[[INSERT:.*]] = insertelement <2 x double> %[[LOAD]], double %scalar, i32 1 + ; CHECK: %[[LOAD:.*]] = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer.v2f64.tdx.TypedBuffer_v2f64_1_0_0t(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index) + ; CHECK: %[[VEC:.*]] = extractvalue { <2 x double>, i1 } %[[LOAD]], 0 + ; CHECK: %[[INSERT:.*]] = insertelement <2 x double> %[[VEC]], double %scalar, i32 1 ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v2f64(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index, <2 x double> %[[INSERT]]) %y_ptr = getelementptr inbounds i8, ptr %ptr, i32 8 store double %scalar, ptr %y_ptr diff --git a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll index c837b36a19e11..cd21adc11a9b4 100644 --- a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll +++ b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll @@ -29,18 +29,20 @@ entry: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() ; CHECK-NOT: load {{.*}} ptr @In %1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 - ; CSE: call noundef <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t - %2 = call noundef <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0) + ; CSE: call noundef { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t + %load = call noundef {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0) + %2 = extractvalue {<4 x float>, i1} %load, 0 ; CHECK-NOT: load {{.*}} ptr @In %3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 - %4 = call noundef <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0) + %load2 = call noundef {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0) + %4 = extractvalue {<4 x float>, i1} %load2, 0 %add.i = fadd <4 x float> %2, %4 call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i) ; CHECK: ret void ret void } -; CSE-DAG: declare <4 x float> @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]] +; CSE-DAG: declare { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]] ; CSE-DAG: declare void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]] attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll index 26223359dfdf1..060d54f961c70 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll @@ -17,8 +17,9 @@ target triple = "dxil-pc-shadermodel6.7-library" define <4 x float> @multicomponent() #0 { %res = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) - %val = call <4 x float> @llvm.dx.resource.load.typedbuffer( + %load = call {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %res, i32 0) + %val = extractvalue {<4 x float>, i1} %load, 0 ret <4 x float> %val } @@ -26,8 +27,9 @@ define <4 x float> @multicomponent() #0 { define float @onecomponent() #0 { %res = call target("dx.TypedBuffer", float, 1, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) - %val = call float @llvm.dx.resource.load.typedbuffer( + %load = call {float, i1} @llvm.dx.resource.load.typedbuffer( target("dx.TypedBuffer", float, 1, 0, 0) %res, i32 0) + %val = extractvalue {float, i1} %load, 0 ret float %val }