From 63f624e85c46de437f018035e295ca336e3079ce Mon Sep 17 00:00:00 2001 From: tyb0807 Date: Mon, 17 Nov 2025 08:11:40 -0600 Subject: [PATCH 1/4] [AMDGPU] Enable serializing of allocated preload kernarg SGPRs info - Support serialization of the number of allocated preload kernarg SGPRs - Support serialization of the first preload kernarg SGPR allocated Together they enable reconstructing correctly MIR with preload kernarg SGPRs. --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 29 +++++ .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 20 +++- .../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 5 + .../AMDGPU/long-branch-reg-all-sgpr-used.ll | 2 + .../AMDGPU/machine-function-info-after-pei.ll | 1 + ...ine-function-info-long-branch-reg-debug.ll | 1 + .../machine-function-info-long-branch-reg.ll | 1 + .../AMDGPU/machine-function-info-no-ir.mir | 4 + .../MIR/AMDGPU/machine-function-info.ll | 4 + .../CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll | 108 ++++++++++++++++++ 10 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0346580ffa684..f0da640b08a01 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2027,6 +2027,35 @@ bool GCNTargetMachine::parseMachineFunctionInfo( MFI->ArgInfo.WorkItemIDZ, 0, 0))) return true; + // Parse FirstKernArgPreloadReg separately, since it's a Register, + // not ArgDescriptor. + if (YamlMFI.ArgInfo && YamlMFI.ArgInfo->FirstKernArgPreloadReg) { + const auto &A = *YamlMFI.ArgInfo->FirstKernArgPreloadReg; + + if (!A.IsRegister) { + const MemoryBuffer &Buffer = + *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID()); + Error = + SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 0, + SourceMgr::DK_Error, + "firstKernArgPreloadReg must be a register", "", {}, {}); + return true; + } + + Register Reg; + if (parseNamedRegisterReference(PFS, Reg, A.RegisterName.Value, Error)) { + SourceRange = A.RegisterName.SourceRange; + return true; + } + + if (!AMDGPU::SGPR_32RegClass.contains(Reg)) + return diagnoseRegisterClass(A.RegisterName); + + MFI->ArgInfo.FirstKernArgPreloadReg = Reg; + + MFI->NumUserSGPRs += YamlMFI.NumKernargPreloadSGPRs; + } + if (ST.hasIEEEMode()) MFI->Mode.IEEE = YamlMFI.Mode.IEEE; if (ST.hasDX10ClampMode()) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index b398db4f7caff..a716a99ee98e6 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -696,7 +696,6 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, return true; }; - // TODO: Need to serialize kernarg preloads. bool Any = false; Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer); Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr); @@ -718,6 +717,20 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY); Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ); + // Write FirstKernArgPreloadReg separately, since it's a Register, + // not ArgDescriptor. + if (ArgInfo.FirstKernArgPreloadReg) { + Register Reg = ArgInfo.FirstKernArgPreloadReg; + if (Reg.isPhysical()) { + yaml::SIArgument SA = yaml::SIArgument::createArgument(true); + raw_string_ostream OS(SA.RegisterName.Value); + OS << printReg(Reg, &TRI); + + AI.FirstKernArgPreloadReg = SA; + Any = true; + } + } + if (Any) return AI; @@ -750,7 +763,8 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()), IsWholeWaveFunction(MFI.isWholeWaveFunction()), DynamicVGPRBlockSize(MFI.getDynamicVGPRBlockSize()), - ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()) { + ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()), + NumKernargPreloadSGPRs(MFI.getNumKernargPreloadedSGPRs()) { for (Register Reg : MFI.getSGPRSpillPhysVGPRs()) SpillPhysVGPRS.push_back(regToString(Reg, TRI)); @@ -799,6 +813,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( ReturnsVoid = YamlMFI.ReturnsVoid; IsWholeWaveFunction = YamlMFI.IsWholeWaveFunction; + UserSGPRInfo.allocKernargPreloadSGPRs(YamlMFI.NumKernargPreloadSGPRs); + if (YamlMFI.ScavengeFI) { auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo()); if (!FIOrErr) { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index ca3c35067a923..d901f4c216551 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -170,6 +170,7 @@ struct SIArgumentInfo { std::optional DispatchID; std::optional FlatScratchInit; std::optional PrivateSegmentSize; + std::optional FirstKernArgPreloadReg; std::optional WorkGroupIDX; std::optional WorkGroupIDY; @@ -195,6 +196,7 @@ template <> struct MappingTraits { YamlIO.mapOptional("dispatchID", AI.DispatchID); YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit); YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize); + YamlIO.mapOptional("firstKernArgPreloadReg", AI.FirstKernArgPreloadReg); YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX); YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY); @@ -305,6 +307,8 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { unsigned DynamicVGPRBlockSize = 0; unsigned ScratchReservedForDynamicVGPRs = 0; + unsigned NumKernargPreloadSGPRs = 0; + SIMachineFunctionInfo() = default; SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, const TargetRegisterInfo &TRI, @@ -361,6 +365,7 @@ template <> struct MappingTraits { YamlIO.mapOptional("dynamicVGPRBlockSize", MFI.DynamicVGPRBlockSize, false); YamlIO.mapOptional("scratchReservedForDynamicVGPRs", MFI.ScratchReservedForDynamicVGPRs, 0); + YamlIO.mapOptional("numKernargPreloadSGPRs", MFI.NumKernargPreloadSGPRs, 0); YamlIO.mapOptional("isWholeWaveFunction", MFI.IsWholeWaveFunction, false); } }; diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll index ed8bc9ca700a8..d1fae8ae92a2a 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll @@ -48,6 +48,7 @@ ; CHECK-NEXT: hasInitWholeWave: false ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 +; CHECK-NEXT: numKernargPreloadSGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false ; CHECK-NEXT: body: define amdgpu_kernel void @long_branch_used_all_sgprs(ptr addrspace(1) %arg, i32 %cnd) #0 { @@ -320,6 +321,7 @@ ; CHECK-NEXT: hasInitWholeWave: false ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 +; CHECK-NEXT: numKernargPreloadSGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false ; CHECK-NEXT: body: define amdgpu_kernel void @long_branch_high_num_sgprs_used(ptr addrspace(1) %arg, i32 %cnd) #0 { diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll index 68c3d1b2f2972..3e4eaf0a3cd98 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll @@ -48,6 +48,7 @@ ; AFTER-PEI-NEXT: hasInitWholeWave: false ; AFTER-PEI-NEXT: dynamicVGPRBlockSize: 0 ; AFTER-PEI-NEXT: scratchReservedForDynamicVGPRs: 0 +; AFTER-PEI-NEXT: numKernargPreloadSGPRs: 0 ; AFTER-PEI-NEXT: isWholeWaveFunction: false ; AFTER-PEI-NEXT: body: define amdgpu_kernel void @scavenge_fi(ptr addrspace(1) %out, i32 %in) #0 { diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll index 55598ec70d953..2d820102e8706 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll @@ -48,6 +48,7 @@ ; CHECK-NEXT: hasInitWholeWave: false ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 +; CHECK-NEXT: numKernargPreloadSGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false ; CHECK-NEXT: body: define amdgpu_kernel void @uniform_long_forward_branch_debug(ptr addrspace(1) %arg, i32 %arg1) #0 !dbg !5 { diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll index 2326b2dc09b58..c949a3d94c6a3 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll @@ -48,6 +48,7 @@ ; CHECK-NEXT: hasInitWholeWave: false ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 +; CHECK-NEXT: numKernargPreloadSGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false ; CHECK-NEXT: body: define amdgpu_kernel void @uniform_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) #0 { diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir index 0cb9bc095bc50..87c3eb626ef0d 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -57,6 +57,7 @@ # FULL-NEXT: hasInitWholeWave: false # FULL-NEXT: dynamicVGPRBlockSize: 0 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0 +# FULL-NEXT: numKernargPreloadSGPRs: 0 # FULL-NEXT: isWholeWaveFunction: false # FULL-NEXT: body: @@ -167,6 +168,7 @@ body: | # FULL-NEXT: hasInitWholeWave: false # FULL-NEXT: dynamicVGPRBlockSize: 0 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0 +# FULL-NEXT: numKernargPreloadSGPRs: 0 # FULL-NEXT: isWholeWaveFunction: false # FULL-NEXT: body: @@ -248,6 +250,7 @@ body: | # FULL-NEXT: hasInitWholeWave: false # FULL-NEXT: dynamicVGPRBlockSize: 0 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0 +# FULL-NEXT: numKernargPreloadSGPRs: 0 # FULL-NEXT: isWholeWaveFunction: false # FULL-NEXT: body: @@ -330,6 +333,7 @@ body: | # FULL-NEXT: hasInitWholeWave: false # FULL-NEXT: dynamicVGPRBlockSize: 0 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0 +# FULL-NEXT: numKernargPreloadSGPRs: 0 # FULL-NEXT: isWholeWaveFunction: false # FULL-NEXT: body: diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll index ab4383b675243..ab3c0335f8ea9 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -58,6 +58,7 @@ ; CHECK-NEXT: hasInitWholeWave: false ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 +; CHECK-NEXT: numKernargPreloadSGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false ; CHECK-NEXT: body: define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { @@ -110,6 +111,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { ; CHECK-NEXT: hasInitWholeWave: false ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 +; CHECK-NEXT: numKernargPreloadSGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false ; CHECK-NEXT: body: define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { @@ -186,6 +188,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { ; CHECK-NEXT: hasInitWholeWave: false ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 +; CHECK-NEXT: numKernargPreloadSGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false ; CHECK-NEXT: body: define void @function() { @@ -244,6 +247,7 @@ define void @function() { ; CHECK-NEXT: hasInitWholeWave: false ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 +; CHECK-NEXT: numKernargPreloadSGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false ; CHECK-NEXT: body: define void @function_nsz() #0 { diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll new file mode 100644 index 0000000000000..de9a268805995 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll @@ -0,0 +1,108 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -stop-after=amdgpu-isel %s -o - | FileCheck --check-prefix=MIR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -stop-after=amdgpu-isel -o %t.mir %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %t.mir -o - | FileCheck --check-prefix=ASM %s + +; Test that kernarg preloading information is correctly serialized to MIR and +; can be round-tripped through MIR serialization/deserialization. + +; MIR-LABEL: name: kernarg_preload_single_arg +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' } +; MIR: numKernargPreloadSGPRs: 1 + +; ASM-LABEL: kernarg_preload_single_arg: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 1 +; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0 +define amdgpu_kernel void @kernarg_preload_single_arg(i32 inreg %arg0) { +entry: + %val = add i32 %arg0, 1 + store i32 %val, ptr addrspace(1) null + ret void +} + +; MIR-LABEL: name: kernarg_preload_multiple_args_unaligned +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' } +; MIR: numKernargPreloadSGPRs: 5 + +; ASM-LABEL: kernarg_preload_multiple_args_unaligned: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 5 +; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0 +define amdgpu_kernel void @kernarg_preload_multiple_args_unaligned(i32 inreg %arg0, i64 inreg %arg1, i32 inreg %arg2) { +entry: + %val = add i32 %arg0, %arg2 + store i32 %val, ptr addrspace(1) null + ret void +} + +; MIR-LABEL: name: kernarg_preload_multiple_args_aligned +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' } +; MIR: numKernargPreloadSGPRs: 4 + +; ASM-LABEL: kernarg_preload_multiple_args_aligned: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 4 +; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0 +define amdgpu_kernel void @kernarg_preload_multiple_args_aligned(i64 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2) { +entry: + %val = add i32 %arg1, %arg2 + store i32 %val, ptr addrspace(1) null + ret void +} + +; MIR-LABEL: name: kernarg_preload_with_ptr +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' } +; MIR: numKernargPreloadSGPRs: 2 + +; ASM-LABEL: kernarg_preload_with_ptr: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2 +; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0 +define amdgpu_kernel void @kernarg_preload_with_ptr(ptr inreg %ptr) { +entry: + %val = load i32, ptr %ptr + %add = add i32 %val, 1 + store i32 %add, ptr %ptr + ret void +} + +; MIR-LABEL: name: kernarg_no_preload +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } +; MIR-NOT: firstKernArgPreloadReg +; MIR: numKernargPreloadSGPRs: 0 + +; ASM-LABEL: kernarg_no_preload: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 0 +define amdgpu_kernel void @kernarg_no_preload(i32 %arg0) { +entry: + %val = add i32 %arg0, 1 + store i32 %val, ptr addrspace(1) null + ret void +} + +; MIR-LABEL: name: kernarg_preload_mixed +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' } +; MIR: numKernargPreloadSGPRs: 2 + +; ASM-LABEL: kernarg_preload_mixed: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2 +define amdgpu_kernel void @kernarg_preload_mixed(i32 inreg %arg0, i32 inreg %arg1, i32 %arg2) { +entry: + %val = add i32 %arg0, %arg1 + %val2 = add i32 %val, %arg2 + store i32 %val2, ptr addrspace(1) null + ret void +} From 13a6ac79260dac26ce17a0a017e0383a1f8e8fb1 Mon Sep 17 00:00:00 2001 From: tyb0807 Date: Wed, 19 Nov 2025 17:31:51 -0600 Subject: [PATCH 2/4] Address comments --- .../CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll index de9a268805995..d96dcfc52e1b0 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll @@ -106,3 +106,149 @@ entry: store i32 %val2, ptr addrspace(1) null ret void } + +; MIR-LABEL: name: kernarg_preload_with_dispatch_ptr +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: dispatchPtr: { reg: '$sgpr0_sgpr1' } +; MIR: kernargSegmentPtr: { reg: '$sgpr2_sgpr3' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr4' } +; MIR: numKernargPreloadSGPRs: 2 + +; ASM-LABEL: kernarg_preload_with_dispatch_ptr: +; ASM: .amdhsa_user_sgpr_dispatch_ptr 1 +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2 + +define amdgpu_kernel void @kernarg_preload_with_dispatch_ptr(i64 inreg %arg0) #0 { +entry: + %val = add i64 %arg0, 1 + store i64 %val, ptr addrspace(1) null + ret void +} + +attributes #0 = { "amdgpu-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-dispatch-id" } + +; MIR-LABEL: name: kernarg_preload_with_queue_ptr +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: queuePtr: { reg: '$sgpr0_sgpr1' } +; MIR: kernargSegmentPtr: { reg: '$sgpr2_sgpr3' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr4' } +; MIR: numKernargPreloadSGPRs: 1 + +; ASM-LABEL: kernarg_preload_with_queue_ptr: +; ASM: .amdhsa_user_sgpr_queue_ptr 1 +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 1 + +define amdgpu_kernel void @kernarg_preload_with_queue_ptr(i32 inreg %arg0) #1 { +entry: + %val = add i32 %arg0, 1 + store i32 %val, ptr addrspace(1) null + ret void +} + +attributes #1 = { "amdgpu-queue-ptr" "amdgpu-no-dispatch-ptr" "amdgpu-no-dispatch-id" } + +; MIR-LABEL: name: kernarg_preload_with_multiple_user_sgprs +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: dispatchPtr: { reg: '$sgpr0_sgpr1' } +; MIR: queuePtr: { reg: '$sgpr2_sgpr3' } +; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } +; MIR: dispatchID: { reg: '$sgpr6_sgpr7' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' } +; MIR: numKernargPreloadSGPRs: 2 + +; ASM-LABEL: kernarg_preload_with_multiple_user_sgprs: +; ASM: .amdhsa_user_sgpr_dispatch_ptr 1 +; ASM: .amdhsa_user_sgpr_queue_ptr 1 +; ASM: .amdhsa_user_sgpr_dispatch_id 1 +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2 + +define amdgpu_kernel void @kernarg_preload_with_multiple_user_sgprs(i64 inreg %arg0) #5 { +entry: + %val = add i64 %arg0, 1 + store i64 %val, ptr addrspace(1) null + ret void +} + +attributes #2 = { "amdgpu-dispatch-ptr" "amdgpu-queue-ptr" "amdgpu-dispatch-id" } + +; MIR-LABEL: name: kernarg_preload_without_user_sgprs +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: kernargSegmentPtr: { reg: '$sgpr0_sgpr1' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr2' } +; MIR: numKernargPreloadSGPRs: 1 + +; ASM-LABEL: kernarg_preload_without_user_sgprs: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 1 + +define amdgpu_kernel void @kernarg_preload_without_user_sgprs(i32 inreg %arg0) #3 { +entry: + %val = add i32 %arg0, 1 + store i32 %val, ptr addrspace(1) null + ret void +} + +attributes #3 = { "amdgpu-no-queue-ptr" "amdgpu-no-dispatch-ptr" "amdgpu-no-dispatch-id" } + +; MIR-LABEL: name: kernarg_preload_max_args +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: dispatchPtr: { reg: '$sgpr0_sgpr1' } +; MIR: queuePtr: { reg: '$sgpr2_sgpr3' } +; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } +; MIR: dispatchID: { reg: '$sgpr6_sgpr7' } +; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' } +; MIR: numKernargPreloadSGPRs: 8 + +; ASM-LABEL: kernarg_preload_max_args: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 8 + +define amdgpu_kernel void @kernarg_preload_max_args( + i32 inreg %a0, i32 inreg %a1, i32 inreg %a2, i32 inreg %a3, + i32 inreg %a4, i32 inreg %a5, i32 inreg %a6, i32 inreg %a7, + i32 inreg %a8, i32 inreg %a9, i32 inreg %a10, i32 inreg %a11, + i32 inreg %a12, i32 inreg %a13, i32 inreg %a14, i32 inreg %a15) { +entry: + ret void +} + +; MIR-LABEL: name: kernarg_preload_mixed_inreg_and_stack +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' } +; MIR: numKernargPreloadSGPRs: 2 + +; ASM-LABEL: kernarg_preload_mixed_inreg_and_stack: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2 + +define amdgpu_kernel void @kernarg_preload_mixed_inreg_and_stack( + i32 inreg %preload0, + i32 inreg %preload1, + i32 %stack0, + i32 %stack1) { +entry: + %val = add i32 %preload0, %preload1 + %val2 = add i32 %val, %stack0 + %val3 = add i32 %val2, %stack1 + store i32 %val3, ptr addrspace(1) null + ret void +} + +; MIR-LABEL: name: kernarg_preload_vector_types +; MIR: machineFunctionInfo: +; MIR: argumentInfo: +; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' } +; MIR: numKernargPreloadSGPRs: 4 + +; ASM-LABEL: kernarg_preload_vector_types: +; ASM: .amdhsa_user_sgpr_kernarg_preload_length 4 + +define amdgpu_kernel void @kernarg_preload_vector_types(<4 x i32> inreg %vec) { +entry: + %elem = extractelement <4 x i32> %vec, i32 0 + store i32 %elem, ptr addrspace(1) null + ret void +} From d6b6f6aa2afcafcbff7ab533be120797a7377b08 Mon Sep 17 00:00:00 2001 From: tyb0807 Date: Thu, 20 Nov 2025 17:36:27 -0600 Subject: [PATCH 3/4] More tests + address comments --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 19 +++++++++++++------ .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 14 +++++++------- ...d-kernarg-invalid-register-class-error.mir | 18 ++++++++++++++++++ ...ad-kernarg-invalid-register-name-error.mir | 19 +++++++++++++++++++ .../preload-kernarg-stack-type-error.mir | 18 ++++++++++++++++++ 5 files changed, 75 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index f0da640b08a01..413b4bc5c4843 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2030,15 +2030,23 @@ bool GCNTargetMachine::parseMachineFunctionInfo( // Parse FirstKernArgPreloadReg separately, since it's a Register, // not ArgDescriptor. if (YamlMFI.ArgInfo && YamlMFI.ArgInfo->FirstKernArgPreloadReg) { - const auto &A = *YamlMFI.ArgInfo->FirstKernArgPreloadReg; + const yaml::SIArgument &A = *YamlMFI.ArgInfo->FirstKernArgPreloadReg; if (!A.IsRegister) { + // For stack arguments, we don't have RegisterName.SourceRange, + // but we should have some location info from the YAML parser const MemoryBuffer &Buffer = *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID()); - Error = - SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 0, - SourceMgr::DK_Error, - "firstKernArgPreloadReg must be a register", "", {}, {}); + // Create a minimal valid source range + SMLoc Loc = SMLoc::getFromPointer(Buffer.getBufferStart()); + SMRange Range(Loc, Loc); + + Error = SMDiagnostic(*PFS.SM, Loc, Buffer.getBufferIdentifier(), 1, 0, + SourceMgr::DK_Error, + "firstKernArgPreloadReg must be a register, not a stack location", + "", {}, {}); + + SourceRange = Range; return true; } @@ -2052,7 +2060,6 @@ bool GCNTargetMachine::parseMachineFunctionInfo( return diagnoseRegisterClass(A.RegisterName); MFI->ArgInfo.FirstKernArgPreloadReg = Reg; - MFI->NumUserSGPRs += YamlMFI.NumKernargPreloadSGPRs; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index a716a99ee98e6..33e3b18a43a12 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -721,14 +721,14 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, // not ArgDescriptor. if (ArgInfo.FirstKernArgPreloadReg) { Register Reg = ArgInfo.FirstKernArgPreloadReg; - if (Reg.isPhysical()) { - yaml::SIArgument SA = yaml::SIArgument::createArgument(true); - raw_string_ostream OS(SA.RegisterName.Value); - OS << printReg(Reg, &TRI); + assert(Reg.isPhysical() && "FirstKernArgPreloadReg must be a physical register"); - AI.FirstKernArgPreloadReg = SA; - Any = true; - } + yaml::SIArgument SA = yaml::SIArgument::createArgument(true); + raw_string_ostream OS(SA.RegisterName.Value); + OS << printReg(Reg, &TRI); + + AI.FirstKernArgPreloadReg = SA; + Any = true; } if (Any) diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir new file mode 100644 index 0000000000000..10391dbaa9ba8 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir @@ -0,0 +1,18 @@ +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s + +--- +# CHECK: error: {{.*}} incorrect register class for field +name: kernarg_preload_wrong_register_class +tracksRegLiveness: true +machineFunctionInfo: + explicitKernArgSize: 4 + maxKernArgAlign: 4 + numKernargPreloadSGPRs: 1 + isEntryFunction: true + argumentInfo: + kernargSegmentPtr: { reg: '$sgpr0_sgpr1' } + firstKernArgPreloadReg: { reg: '$vgpr0' } # ERROR: VGPR instead of SGPR +body: | + bb.0: + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir new file mode 100644 index 0000000000000..dbf739c41003b --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir @@ -0,0 +1,19 @@ +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s + +--- +# CHECK: Invalid register name +name: kernarg_preload_invalid_register_name +tracksRegLiveness: true +machineFunctionInfo: + explicitKernArgSize: 4 + maxKernArgAlign: 4 + numKernargPreloadSGPRs: 1 + isEntryFunction: true + argumentInfo: + kernargSegmentPtr: { reg: '$sgpr0_sgpr1' } + firstKernArgPreloadReg: { reg: '$invalid_reg' } # ERROR: Invalid register name +body: | + bb.0: + S_ENDPGM 0 +... + diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir new file mode 100644 index 0000000000000..de2d8f586c2c1 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir @@ -0,0 +1,18 @@ +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s + +--- +# CHECK: error: {{.*}} firstKernArgPreloadReg must be a register, not a stack location +name: kernarg_preload_stack_argument +tracksRegLiveness: true +machineFunctionInfo: + explicitKernArgSize: 4 + maxKernArgAlign: 4 + numKernargPreloadSGPRs: 1 + isEntryFunction: true + argumentInfo: + kernargSegmentPtr: { reg: '$sgpr0_sgpr1' } + firstKernArgPreloadReg: { offset: 0 } # ERROR: Stack instead of register +body: | + bb.0: + S_ENDPGM 0 +... From 227220351cbd1945a4bb04cea95816cdbdaf068a Mon Sep 17 00:00:00 2001 From: tyb0807 Date: Sat, 22 Nov 2025 14:58:32 -0600 Subject: [PATCH 4/4] Format + review comments --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 12 ++++++------ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 3 ++- .../preload-kernarg-invalid-register-class-error.mir | 2 +- .../preload-kernarg-invalid-register-name-error.mir | 2 +- .../MIR/AMDGPU/preload-kernarg-stack-type-error.mir | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 413b4bc5c4843..05e52d043c3b6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2040,12 +2040,12 @@ bool GCNTargetMachine::parseMachineFunctionInfo( // Create a minimal valid source range SMLoc Loc = SMLoc::getFromPointer(Buffer.getBufferStart()); SMRange Range(Loc, Loc); - - Error = SMDiagnostic(*PFS.SM, Loc, Buffer.getBufferIdentifier(), 1, 0, - SourceMgr::DK_Error, - "firstKernArgPreloadReg must be a register, not a stack location", - "", {}, {}); - + + Error = SMDiagnostic( + *PFS.SM, Loc, Buffer.getBufferIdentifier(), 1, 0, SourceMgr::DK_Error, + "firstKernArgPreloadReg must be a register, not a stack location", "", + {}, {}); + SourceRange = Range; return true; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 33e3b18a43a12..9abda275d7e42 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -721,7 +721,8 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, // not ArgDescriptor. if (ArgInfo.FirstKernArgPreloadReg) { Register Reg = ArgInfo.FirstKernArgPreloadReg; - assert(Reg.isPhysical() && "FirstKernArgPreloadReg must be a physical register"); + assert(Reg.isPhysical() && + "FirstKernArgPreloadReg must be a physical register"); yaml::SIArgument SA = yaml::SIArgument::createArgument(true); raw_string_ostream OS(SA.RegisterName.Value); diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir index 10391dbaa9ba8..e7c1740711952 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir @@ -1,4 +1,4 @@ -# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s --- # CHECK: error: {{.*}} incorrect register class for field diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir index dbf739c41003b..c74a437b5221c 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir @@ -1,4 +1,4 @@ -# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s --- # CHECK: Invalid register name diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir index de2d8f586c2c1..19076044170fd 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir @@ -1,4 +1,4 @@ -# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s --- # CHECK: error: {{.*}} firstKernArgPreloadReg must be a register, not a stack location