From b5f7cac07a2c30c8ee9e730e53ba2d946e4a4d7d Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Fri, 2 May 2025 17:24:17 +0000 Subject: [PATCH 1/2] [AMDGPU] Add make.buffer.rsrc to InferAddressSpaces make.buffer.rsrc can be subjected to address space inference. There's not _currently_ a reason to have this, but we might as well handle this in case it comes up. --- .../Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 11 +++++++++++ .../InferAddressSpaces/AMDGPU/mem-intrinsics.ll | 16 ++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 05e1aa02d2703..86a6e49fce027 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1044,6 +1044,7 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_flat_atomic_fmin_num: case Intrinsic::amdgcn_load_to_lds: + case Intrinsic::amdgcn_make_buffer_rsrc: OpIndexes.push_back(0); return true; default: @@ -1124,6 +1125,16 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, II->setCalledFunction(NewDecl); return II; } + case Intrinsic::amdgcn_make_buffer_rsrc: { + Type *SrcTy = NewV->getType(); + Type *DstTy = II->getType(); + Module *M = II->getModule(); + Function *NewDecl = Intrinsic::getOrInsertDeclaration( + M, II->getIntrinsicID(), {DstTy, SrcTy}); + II->setArgOperand(0, NewV); + II->setCalledFunction(NewDecl); + return II; + } default: return nullptr; } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll index 9f67859a3f147..d6e79dafc9fcd 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -192,6 +192,20 @@ define amdgpu_kernel void @load_to_lds_fat_pointer_as_flat(ptr addrspace(7) %buf ret void } +define amdgpu_kernel void @make_buffer_rsrc_global_as_flat(ptr addrspace(1) %global, i32 %extent) { + ;; NOTE: flags value not representative of real input +; CHECK-LABEL: define amdgpu_kernel void @make_buffer_rsrc_global_as_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL:%.*]], i32 [[EXTENT:%.*]]) { +; CHECK-NEXT: [[BUFFER_FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[GLOBAL]], i16 0, i32 [[EXTENT]], i32 0) +; CHECK-NEXT: store i32 [[EXTENT]], ptr addrspace(7) [[BUFFER_FAT_PTR]], align 4 +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %global to ptr + %buffer.fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p9(ptr %cast, i16 0, i32 %extent, i32 0) + store i32 %extent, ptr addrspace(7) %buffer.fat.ptr + ret void +} + declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #1 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1 declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1 @@ -199,6 +213,8 @@ declare void @llvm.memcpy.p0.p3.i32(ptr nocapture writeonly, ptr addrspace(3) no declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1 declare void @llvm.amdgcn.load.to.lds.p0(ptr nocapture readonly, ptr addrspace(3) nocapture writeonly, i32 immarg, i32 immarg, i32 immarg) #1 +declare void @llvm.amdgcn.make.buffer.rsrc.p0(ptr nocapture readnone, i16, i32, i32) #0 + attributes #0 = { nounwind } attributes #1 = { argmemonly nounwind } From e08a2e152ac07f4f5a790610f9e4e62eadf8541d Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Tue, 20 May 2025 11:29:09 -0700 Subject: [PATCH 2/2] Fix test typos that somehow still let things work Co-authored-by: Matt Arsenault --- .../Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll index d6e79dafc9fcd..57453d63d7e8a 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -201,7 +201,7 @@ define amdgpu_kernel void @make_buffer_rsrc_global_as_flat(ptr addrspace(1) %glo ; CHECK-NEXT: ret void ; %cast = addrspacecast ptr addrspace(1) %global to ptr - %buffer.fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p9(ptr %cast, i16 0, i32 %extent, i32 0) + %buffer.fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr %cast, i16 0, i32 %extent, i32 0) store i32 %extent, ptr addrspace(7) %buffer.fat.ptr ret void } @@ -213,7 +213,7 @@ declare void @llvm.memcpy.p0.p3.i32(ptr nocapture writeonly, ptr addrspace(3) no declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1 declare void @llvm.amdgcn.load.to.lds.p0(ptr nocapture readonly, ptr addrspace(3) nocapture writeonly, i32 immarg, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.make.buffer.rsrc.p0(ptr nocapture readnone, i16, i32, i32) #0 +declare ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr readnone, i16, i32, i32) #0 attributes #0 = { nounwind } attributes #1 = { argmemonly nounwind }